From 377edd82890af15ec6f683b443ca415ae5deff95 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 09:51:18 -0800 Subject: [PATCH 01/19] Added decorator to XlrdTests --- pandas/tests/io/test_excel.py | 17 ++++------------- pandas/util/_test_decorators.py | 6 +++++- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 86cee54665781..65e86a70e2951 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -146,17 +146,11 @@ class ReadingTestsBase(SharedItems): # # Base class for test cases to run with different Excel readers. # To add a reader test, define the following: - # 1. A check_skip function that skips your tests if your reader isn't - # installed. - # 2. Add a property ext, which is the file extension that your reader + # 1. Add a property ext, which is the file extension that your reader # reades from. (needs to start with '.' so it's a valid path) - # 3. Add a property engine_name, which is the name of the reader class. + # 2. Add a property engine_name, which is the name of the reader class. # For the reader this is not used for anything at the moment. - def setup_method(self, method): - self.check_skip() - super(ReadingTestsBase, self).setup_method(method) - def test_usecols_int(self): dfref = self.get_csv_refdf('test1') @@ -567,6 +561,7 @@ def test_sheet_name_both_raises(self): self.get_exceldf('test1', sheetname='Sheet1', sheet_name='Sheet1') +@td.skip_if_no('xlrd', '0.9') class XlrdTests(ReadingTestsBase): """ This is the base class for the xlrd tests, and 3 different file formats @@ -589,7 +584,6 @@ def test_excel_read_buffer(self): def test_read_xlrd_Book(self): _skip_if_no_xlwt() - import xlrd df = self.frame with ensure_clean('.xls') as pth: df.to_excel(pth, "SheetA") @@ -713,9 +707,9 @@ def tdf(sheetname): tm.assert_frame_equal(dfs[s], dfs_returned[s]) def test_reader_seconds(self): - # Test reading times with and without milliseconds. GH5945. import xlrd + # Test reading times with and without milliseconds. GH5945. if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): # Xlrd >= 0.9.3 can handle Excel milliseconds. expected = DataFrame.from_dict({"Time": [time(1, 2, 3), @@ -1061,19 +1055,16 @@ def test_read_excel_squeeze(self): class TestXlsReaderTests(XlrdTests): ext = '.xls' engine_name = 'xlrd' - check_skip = staticmethod(_skip_if_no_xlrd) class TestXlsxReaderTests(XlrdTests): ext = '.xlsx' engine_name = 'xlrd' - check_skip = staticmethod(_skip_if_no_xlrd) class TestXlsmReaderTests(XlrdTests): ext = '.xlsm' engine_name = 'xlrd' - check_skip = staticmethod(_skip_if_no_xlrd) class ExcelWriterBase(SharedItems): diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 0fd5648739e5c..b2745ab5eec77 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -57,7 +57,11 @@ def safe_import(mod_name, min_version=None): return mod else: import sys - version = getattr(sys.modules[mod_name], '__version__') + try: + version = getattr(sys.modules[mod_name], '__version__') + except AttributeError: + # xlrd uses a capitalized attribute name + version = getattr(sys.modules[mod_name], '__VERSION__') if version: from distutils.version import LooseVersion if LooseVersion(version) >= LooseVersion(min_version): From c8b7f28644d20648dca15a032b64a31a6a4c6b42 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 10:13:13 -0800 Subject: [PATCH 02/19] Replaced subclasses with parametrization --- pandas/tests/io/test_excel.py | 265 ++++++++++++++++------------------ 1 file changed, 122 insertions(+), 143 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 65e86a70e2951..7823745caaf09 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -103,7 +103,7 @@ def get_csv_refdf(self, basename): dfref = read_csv(pref, index_col=0, parse_dates=True, engine='python') return dfref - def get_excelfile(self, basename): + def get_excelfile(self, basename, ext): """ Return test data ExcelFile instance. Test data path is defined by pandas.util.testing.get_data_path() @@ -119,9 +119,9 @@ def get_excelfile(self, basename): excel : io.excel.ExcelFile """ - return ExcelFile(os.path.join(self.dirpath, basename + self.ext)) + return ExcelFile(os.path.join(self.dirpath, basename + ext)) - def get_exceldf(self, basename, *args, **kwds): + def get_exceldf(self, basename, ext, *args, **kwds): """ Return test data DataFrame. Test data path is defined by pandas.util.testing.get_data_path() @@ -137,30 +137,23 @@ def get_exceldf(self, basename, *args, **kwds): df : DataFrame """ - pth = os.path.join(self.dirpath, basename + self.ext) + pth = os.path.join(self.dirpath, basename + ext) return read_excel(pth, *args, **kwds) class ReadingTestsBase(SharedItems): # This is based on ExcelWriterBase - # - # Base class for test cases to run with different Excel readers. - # To add a reader test, define the following: - # 1. Add a property ext, which is the file extension that your reader - # reades from. (needs to start with '.' so it's a valid path) - # 2. Add a property engine_name, which is the name of the reader class. - # For the reader this is not used for anything at the moment. - def test_usecols_int(self): + def test_usecols_int(self, ext): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['A', 'B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, usecols=3) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=3) + df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, usecols=3) with tm.assert_produces_warning(FutureWarning): - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, parse_cols=3) # TODO add index to xls file) @@ -168,17 +161,17 @@ def test_usecols_int(self): tm.assert_frame_equal(df2, dfref, check_names=False) tm.assert_frame_equal(df3, dfref, check_names=False) - def test_usecols_list(self): + def test_usecols_list(self, ext): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=[0, 2, 3]) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, usecols=[0, 2, 3]) with tm.assert_produces_warning(FutureWarning): - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, parse_cols=[0, 2, 3]) # TODO add index to xls file) @@ -186,18 +179,18 @@ def test_usecols_list(self): tm.assert_frame_equal(df2, dfref, check_names=False) tm.assert_frame_equal(df3, dfref, check_names=False) - def test_usecols_str(self): + def test_usecols_str(self, ext): dfref = self.get_csv_refdf('test1') df1 = dfref.reindex(columns=['A', 'B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols='A:D') - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, usecols='A:D') with tm.assert_produces_warning(FutureWarning): - df4 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], + df4 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, parse_cols='A:D') # TODO add index to xls, read xls ignores index name ? @@ -206,37 +199,37 @@ def test_usecols_str(self): tm.assert_frame_equal(df4, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols='A,C,D') - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, usecols='A,C,D') # TODO add index to xls file tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols='A,C:D') - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, usecols='A,C:D') tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) - def test_excel_stop_iterator(self): + def test_excel_stop_iterator(self, ext): - parsed = self.get_exceldf('test2', 'Sheet1') + parsed = self.get_exceldf('test2', ext, 'Sheet1') expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1']) tm.assert_frame_equal(parsed, expected) - def test_excel_cell_error_na(self): + def test_excel_cell_error_na(self, ext): - parsed = self.get_exceldf('test3', 'Sheet1') + parsed = self.get_exceldf('test3', ext, 'Sheet1') expected = DataFrame([[np.nan]], columns=['Test']) tm.assert_frame_equal(parsed, expected) - def test_excel_passes_na(self): + def test_excel_passes_na(self, ext): - excel = self.get_excelfile('test4') + excel = self.get_excelfile('test4', ext) parsed = read_excel(excel, 'Sheet1', keep_default_na=False, na_values=['apple']) @@ -251,7 +244,7 @@ def test_excel_passes_na(self): tm.assert_frame_equal(parsed, expected) # 13967 - excel = self.get_excelfile('test5') + excel = self.get_excelfile('test5', ext) parsed = read_excel(excel, 'Sheet1', keep_default_na=False, na_values=['apple']) @@ -265,9 +258,9 @@ def test_excel_passes_na(self): columns=['Test']) tm.assert_frame_equal(parsed, expected) - def test_excel_table_sheet_by_index(self): + def test_excel_table_sheet_by_index(self, ext): - excel = self.get_excelfile('test1') + excel = self.get_excelfile('test1', ext) dfref = self.get_csv_refdf('test1') df1 = read_excel(excel, 0, index_col=0) @@ -294,21 +287,21 @@ def test_excel_table_sheet_by_index(self): with pytest.raises(xlrd.XLRDError): read_excel(excel, 'asdf') - def test_excel_table(self): + def test_excel_table(self, ext): dfref = self.get_csv_refdf('test1') - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0) + df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0) + df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0) # TODO add index to file tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - df3 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df3 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, skipfooter=1) tm.assert_frame_equal(df3, df1.iloc[:-1]) - def test_reader_special_dtypes(self): + def test_reader_special_dtypes(self, ext): expected = DataFrame.from_dict(OrderedDict([ ("IntCol", [1, 2, -3, 4, 0]), @@ -324,36 +317,36 @@ def test_reader_special_dtypes(self): basename = 'test_types' # should read in correctly and infer types - actual = self.get_exceldf(basename, 'Sheet1') + actual = self.get_exceldf(basename, ext, 'Sheet1') tm.assert_frame_equal(actual, expected) # if not coercing number, then int comes in as float float_expected = expected.copy() float_expected["IntCol"] = float_expected["IntCol"].astype(float) float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0 - actual = self.get_exceldf(basename, 'Sheet1', convert_float=False) + actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False) tm.assert_frame_equal(actual, float_expected) # check setting Index (assuming xls and xlsx are the same here) for icol, name in enumerate(expected.columns): - actual = self.get_exceldf(basename, 'Sheet1', index_col=icol) + actual = self.get_exceldf(basename, ext, 'Sheet1', index_col=icol) exp = expected.set_index(name) tm.assert_frame_equal(actual, exp) # convert_float and converters should be different but both accepted expected["StrCol"] = expected["StrCol"].apply(str) actual = self.get_exceldf( - basename, 'Sheet1', converters={"StrCol": str}) + basename, ext, 'Sheet1', converters={"StrCol": str}) tm.assert_frame_equal(actual, expected) no_convert_float = float_expected.copy() no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) - actual = self.get_exceldf(basename, 'Sheet1', convert_float=False, + actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False, converters={"StrCol": str}) tm.assert_frame_equal(actual, no_convert_float) # GH8212 - support for converters and missing values - def test_reader_converters(self): + def test_reader_converters(self, ext): basename = 'test_converters' @@ -372,13 +365,13 @@ def test_reader_converters(self): # should read in correctly and set types of single cells (not array # dtypes) - actual = self.get_exceldf(basename, 'Sheet1', converters=converters) + actual = self.get_exceldf(basename, ext, 'Sheet1', converters=converters) tm.assert_frame_equal(actual, expected) - def test_reader_dtype(self): + def test_reader_dtype(self, ext): # GH 8212 basename = 'testdtype' - actual = self.get_exceldf(basename) + actual = self.get_exceldf(basename, ext) expected = DataFrame({ 'a': [1, 2, 3, 4], @@ -389,7 +382,7 @@ def test_reader_dtype(self): tm.assert_frame_equal(actual, expected) - actual = self.get_exceldf(basename, + actual = self.get_exceldf(basename, ext, dtype={'a': 'float64', 'b': 'float32', 'c': str}) @@ -400,14 +393,14 @@ def test_reader_dtype(self): tm.assert_frame_equal(actual, expected) with pytest.raises(ValueError): - actual = self.get_exceldf(basename, dtype={'d': 'int64'}) + actual = self.get_exceldf(basename, ext, dtype={'d': 'int64'}) - def test_reading_all_sheets(self): + def test_reading_all_sheets(self, ext): # Test reading all sheetnames by setting sheetname to None, # Ensure a dict is returned. # See PR #9450 basename = 'test_multisheet' - dfs = self.get_exceldf(basename, sheet_name=None) + dfs = self.get_exceldf(basename, ext, sheet_name=None) # ensure this is not alphabetical to test order preservation expected_keys = ['Charlie', 'Alpha', 'Beta'] tm.assert_contains_all(expected_keys, dfs.keys()) @@ -415,7 +408,7 @@ def test_reading_all_sheets(self): # Ensure sheet order is preserved assert expected_keys == list(dfs.keys()) - def test_reading_multiple_specific_sheets(self): + def test_reading_multiple_specific_sheets(self, ext): # Test reading specific sheetnames by specifying a mixed list # of integers and strings, and confirm that duplicated sheet # references (positions/names) are removed properly. @@ -424,32 +417,32 @@ def test_reading_multiple_specific_sheets(self): basename = 'test_multisheet' # Explicitly request duplicates. Only the set should be returned. expected_keys = [2, 'Charlie', 'Charlie'] - dfs = self.get_exceldf(basename, sheet_name=expected_keys) + dfs = self.get_exceldf(basename, ext, sheet_name=expected_keys) expected_keys = list(set(expected_keys)) tm.assert_contains_all(expected_keys, dfs.keys()) assert len(expected_keys) == len(dfs.keys()) - def test_reading_all_sheets_with_blank(self): + def test_reading_all_sheets_with_blank(self, ext): # Test reading all sheetnames by setting sheetname to None, # In the case where some sheets are blank. # Issue #11711 basename = 'blank_with_header' - dfs = self.get_exceldf(basename, sheet_name=None) + dfs = self.get_exceldf(basename, ext, sheet_name=None) expected_keys = ['Sheet1', 'Sheet2', 'Sheet3'] tm.assert_contains_all(expected_keys, dfs.keys()) # GH6403 - def test_read_excel_blank(self): - actual = self.get_exceldf('blank', 'Sheet1') + def test_read_excel_blank(self, ext): + actual = self.get_exceldf('blank', ext, 'Sheet1') tm.assert_frame_equal(actual, DataFrame()) - def test_read_excel_blank_with_header(self): + def test_read_excel_blank_with_header(self, ext): expected = DataFrame(columns=['col_1', 'col_2']) - actual = self.get_exceldf('blank_with_header', 'Sheet1') + actual = self.get_exceldf('blank_with_header', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) # GH 12292 : error when read one empty column from excel file - def test_read_one_empty_col_no_header(self): + def test_read_one_empty_col_no_header(self, ext): _skip_if_no_xlwt() _skip_if_no_openpyxl() @@ -459,7 +452,7 @@ def test_read_one_empty_col_no_header(self): ["", 3, 300], ["", 4, 400]] ) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, 'no_header', index=False, header=False) actual_header_none = read_excel( path, @@ -478,7 +471,7 @@ def test_read_one_empty_col_no_header(self): tm.assert_frame_equal(actual_header_none, expected) tm.assert_frame_equal(actual_header_zero, expected) - def test_read_one_empty_col_with_header(self): + def test_read_one_empty_col_with_header(self, ext): _skip_if_no_xlwt() _skip_if_no_openpyxl() @@ -488,7 +481,7 @@ def test_read_one_empty_col_with_header(self): ["", 3, 300], ["", 4, 400]] ) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, 'with_header', index=False, header=True) actual_header_none = read_excel( path, @@ -508,7 +501,7 @@ def test_read_one_empty_col_with_header(self): expected_header_zero = DataFrame(columns=[0], dtype='int64') tm.assert_frame_equal(actual_header_zero, expected_header_zero) - def test_set_column_names_in_parameter(self): + def test_set_column_names_in_parameter(self, ext): _skip_if_no_xlwt() _skip_if_no_openpyxl() @@ -517,7 +510,7 @@ def test_set_column_names_in_parameter(self): refdf = pd.DataFrame([[1, 'foo'], [2, 'bar'], [3, 'baz']], columns=['a', 'b']) - with ensure_clean(self.ext) as pth: + with ensure_clean(ext) as pth: with ExcelWriter(pth) as writer: refdf.to_excel(writer, 'Data_no_head', header=False, index=False) @@ -534,43 +527,44 @@ def test_set_column_names_in_parameter(self): tm.assert_frame_equal(xlsdf_no_head, refdf) tm.assert_frame_equal(xlsdf_with_head, refdf) - def test_date_conversion_overflow(self): + def test_date_conversion_overflow(self, ext): # GH 10001 : pandas.ExcelFile ignore parse_dates=False expected = pd.DataFrame([[pd.Timestamp('2016-03-12'), 'Marc Johnson'], [pd.Timestamp('2016-03-16'), 'Jack Black'], [1e+20, 'Timothy Brown']], columns=['DateColWithBigInt', 'StringCol']) - result = self.get_exceldf('testdateoverflow') + result = self.get_exceldf('testdateoverflow', ext) tm.assert_frame_equal(result, expected) - def test_sheet_name_and_sheetname(self): + def test_sheet_name_and_sheetname(self, ext): # GH10559: Minor improvement: Change "sheet_name" to "sheetname" # GH10969: DOC: Consistent var names (sheetname vs sheet_name) # GH12604: CLN GH10559 Rename sheetname variable to sheet_name dfref = self.get_csv_refdf('test1') - df1 = self.get_exceldf('test1', sheet_name='Sheet1') # doc + df1 = self.get_exceldf('test1', ext, sheet_name='Sheet1') # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df2 = self.get_exceldf('test1', sheetname='Sheet1') # bkwrd compat + df2 = self.get_exceldf('test1', ext, sheetname='Sheet1') # bkwrd compat tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - def test_sheet_name_both_raises(self): + def test_sheet_name_both_raises(self, ext): with tm.assert_raises_regex(TypeError, "Cannot specify both"): - self.get_exceldf('test1', sheetname='Sheet1', sheet_name='Sheet1') + self.get_exceldf('test1', ext, sheetname='Sheet1', sheet_name='Sheet1') @td.skip_if_no('xlrd', '0.9') -class XlrdTests(ReadingTestsBase): +@pytest.mark.parametrize("ext", ['.xls', '.xlsx', '.xlsm']) +class TestXlrdReader(ReadingTestsBase): """ This is the base class for the xlrd tests, and 3 different file formats are supported: xls, xlsx, xlsm """ - def test_excel_read_buffer(self): + def test_excel_read_buffer(self, ext): - pth = os.path.join(self.dirpath, 'test1' + self.ext) + pth = os.path.join(self.dirpath, 'test1' + ext) expected = read_excel(pth, 'Sheet1', index_col=0) with open(pth, 'rb') as f: actual = read_excel(f, 'Sheet1', index_col=0) @@ -581,7 +575,7 @@ def test_excel_read_buffer(self): actual = read_excel(xls, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) - def test_read_xlrd_Book(self): + def test_read_xlrd_Book(self, ext): _skip_if_no_xlwt() df = self.frame @@ -597,39 +591,39 @@ def test_read_xlrd_Book(self): tm.assert_frame_equal(df, result) @tm.network - def test_read_from_http_url(self): + def test_read_from_http_url(self, ext): url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/tests/io/data/test1' + self.ext) + 'pandas/tests/io/data/test1' + ext) url_table = read_excel(url) - local_table = self.get_exceldf('test1') + local_table = self.get_exceldf('test1', ext) tm.assert_frame_equal(url_table, local_table) - def test_read_from_s3_url(self): + @td.skip_if_no('s3fs') + def test_read_from_s3_url(self, ext): boto3 = pytest.importorskip('boto3') - pytest.importorskip('s3fs') moto = pytest.importorskip('moto') with moto.mock_s3(): conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket="pandas-test") - file_name = os.path.join(self.dirpath, 'test1' + self.ext) + file_name = os.path.join(self.dirpath, 'test1' + ext) with open(file_name, 'rb') as f: - conn.Bucket("pandas-test").put_object(Key="test1" + self.ext, + conn.Bucket("pandas-test").put_object(Key="test1" + ext, Body=f) - url = ('s3://pandas-test/test1' + self.ext) + url = ('s3://pandas-test/test1' + ext) url_table = read_excel(url) - local_table = self.get_exceldf('test1') + local_table = self.get_exceldf('test1', ext) tm.assert_frame_equal(url_table, local_table) @pytest.mark.slow - def test_read_from_file_url(self): + def test_read_from_file_url(self, ext): # FILE if sys.version_info[:2] < (2, 6): pytest.skip("file:// not supported with Python < 2.6") - localtable = os.path.join(self.dirpath, 'test1' + self.ext) + localtable = os.path.join(self.dirpath, 'test1' + ext) local_table = read_excel(localtable) try: @@ -643,37 +637,37 @@ def test_read_from_file_url(self): tm.assert_frame_equal(url_table, local_table) @td.skip_if_no('pathlib') - def test_read_from_pathlib_path(self): + def test_read_from_pathlib_path(self, ext): # GH12655 from pathlib import Path - str_path = os.path.join(self.dirpath, 'test1' + self.ext) + str_path = os.path.join(self.dirpath, 'test1' + ext) expected = read_excel(str_path, 'Sheet1', index_col=0) - path_obj = Path(self.dirpath, 'test1' + self.ext) + path_obj = Path(self.dirpath, 'test1' + ext) actual = read_excel(path_obj, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) @td.skip_if_no('py.path') - def test_read_from_py_localpath(self): + def test_read_from_py_localpath(self, ext): # GH12655 from py.path import local as LocalPath - str_path = os.path.join(self.dirpath, 'test1' + self.ext) + str_path = os.path.join(self.dirpath, 'test1' + ext) expected = read_excel(str_path, 'Sheet1', index_col=0) abs_dir = os.path.abspath(self.dirpath) - path_obj = LocalPath(abs_dir).join('test1' + self.ext) + path_obj = LocalPath(abs_dir).join('test1' + ext) actual = read_excel(path_obj, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) - def test_reader_closes_file(self): + def test_reader_closes_file(self, ext): - pth = os.path.join(self.dirpath, 'test1' + self.ext) + pth = os.path.join(self.dirpath, 'test1' + ext) f = open(pth, 'rb') with ExcelFile(f) as xlsx: # parses okay @@ -681,7 +675,7 @@ def test_reader_closes_file(self): assert f.closed - def test_creating_and_reading_multiple_sheets(self): + def test_creating_and_reading_multiple_sheets(self, ext): # Test reading multiple sheets, from a runtime created excel file # with multiple sheets. # See PR #9450 @@ -698,7 +692,7 @@ def tdf(sheetname): dfs = [tdf(s) for s in sheets] dfs = dict(zip(sheets, dfs)) - with ensure_clean(self.ext) as pth: + with ensure_clean(ext) as pth: with ExcelWriter(pth) as ew: for sheetname, df in iteritems(dfs): df.to_excel(ew, sheetname) @@ -706,7 +700,7 @@ def tdf(sheetname): for s in sheets: tm.assert_frame_equal(dfs[s], dfs_returned[s]) - def test_reader_seconds(self): + def test_reader_seconds(self, ext): import xlrd # Test reading times with and without milliseconds. GH5945. @@ -737,16 +731,16 @@ def test_reader_seconds(self): time(16, 37, 1), time(18, 20, 54)]}) - actual = self.get_exceldf('times_1900', 'Sheet1') + actual = self.get_exceldf('times_1900', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) - actual = self.get_exceldf('times_1904', 'Sheet1') + actual = self.get_exceldf('times_1904', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) - def test_read_excel_multiindex(self): + def test_read_excel_multiindex(self, ext): # GH 4679 mi = MultiIndex.from_product([['foo', 'bar'], ['a', 'b']]) - mi_file = os.path.join(self.dirpath, 'testmultiindex' + self.ext) + mi_file = os.path.join(self.dirpath, 'testmultiindex' + ext) expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], [2, 3.5, pd.Timestamp('2015-01-02'), False], @@ -800,7 +794,7 @@ def test_read_excel_multiindex(self): header=[0, 1], skiprows=2) tm.assert_frame_equal(actual, expected) - def test_read_excel_multiindex_empty_level(self): + def test_read_excel_multiindex_empty_level(self, ext): # GH 12453 _skip_if_no_xlsxwriter() with ensure_clean('.xlsx') as path: @@ -840,7 +834,7 @@ def test_read_excel_multiindex_empty_level(self): actual = pd.read_excel(path, header=[0, 1]) tm.assert_frame_equal(actual, expected) - def test_excel_multindex_roundtrip(self): + def test_excel_multindex_roundtrip(self, ext): # GH 4679 _skip_if_no_xlsxwriter() with ensure_clean('.xlsx') as pth: @@ -885,9 +879,9 @@ def test_excel_multindex_roundtrip(self): tm.assert_frame_equal( df, act, check_names=check_names) - def test_excel_old_index_format(self): + def test_excel_old_index_format(self, ext): # see gh-4679 - filename = 'test_index_name_pre17' + self.ext + filename = 'test_index_name_pre17' + ext in_file = os.path.join(self.dirpath, filename) # We detect headers to determine if index names exist, so @@ -946,20 +940,20 @@ def test_excel_old_index_format(self): actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) - def test_read_excel_bool_header_arg(self): + def test_read_excel_bool_header_arg(self, ext): # GH 6114 for arg in [True, False]: with pytest.raises(TypeError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), header=arg) - def test_read_excel_chunksize(self): + def test_read_excel_chunksize(self, ext): # GH 8011 with pytest.raises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), chunksize=100) - def test_read_excel_parse_dates(self): + def test_read_excel_parse_dates(self, ext): # GH 11544, 12051 _skip_if_no_openpyxl() _skip_if_no_xlwt() # for df2.to_excel @@ -970,7 +964,7 @@ def test_read_excel_parse_dates(self): df2 = df.copy() df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') - with ensure_clean(self.ext) as pth: + with ensure_clean(ext) as pth: df2.to_excel(pth) res = read_excel(pth) @@ -989,10 +983,10 @@ def test_read_excel_parse_dates(self): date_parser=dateparser, index_col=0) tm.assert_frame_equal(df, res) - def test_read_excel_skiprows_list(self): + def test_read_excel_skiprows_list(self, ext): # GH 4903 actual = pd.read_excel(os.path.join(self.dirpath, - 'testskiprows' + self.ext), + 'testskiprows' + ext), 'skiprows_list', skiprows=[0, 2]) expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], [2, 3.5, pd.Timestamp('2015-01-02'), False], @@ -1002,40 +996,40 @@ def test_read_excel_skiprows_list(self): tm.assert_frame_equal(actual, expected) actual = pd.read_excel(os.path.join(self.dirpath, - 'testskiprows' + self.ext), + 'testskiprows' + ext), 'skiprows_list', skiprows=np.array([0, 2])) tm.assert_frame_equal(actual, expected) - def test_read_excel_nrows(self): + def test_read_excel_nrows(self, ext): # GH 16645 num_rows_to_pull = 5 - actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), nrows=num_rows_to_pull) expected = pd.read_excel(os.path.join(self.dirpath, - 'test1' + self.ext)) + 'test1' + ext)) expected = expected[:num_rows_to_pull] tm.assert_frame_equal(actual, expected) - def test_read_excel_nrows_greater_than_nrows_in_file(self): + def test_read_excel_nrows_greater_than_nrows_in_file(self, ext): # GH 16645 expected = pd.read_excel(os.path.join(self.dirpath, - 'test1' + self.ext)) + 'test1' + ext)) num_records_in_file = len(expected) num_rows_to_pull = num_records_in_file + 10 - actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), nrows=num_rows_to_pull) tm.assert_frame_equal(actual, expected) - def test_read_excel_nrows_non_integer_parameter(self): + def test_read_excel_nrows_non_integer_parameter(self, ext): # GH 16645 msg = "'nrows' must be an integer >=0" with tm.assert_raises_regex(ValueError, msg): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), nrows='5') - def test_read_excel_squeeze(self): + def test_read_excel_squeeze(self, ext): # GH 12157 - f = os.path.join(self.dirpath, 'test_squeeze' + self.ext) + f = os.path.join(self.dirpath, 'test_squeeze' + ext) actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True) expected = pd.Series([2, 3, 4], [4, 5, 6], name='b') @@ -1052,21 +1046,6 @@ def test_read_excel_squeeze(self): tm.assert_series_equal(actual, expected) -class TestXlsReaderTests(XlrdTests): - ext = '.xls' - engine_name = 'xlrd' - - -class TestXlsxReaderTests(XlrdTests): - ext = '.xlsx' - engine_name = 'xlrd' - - -class TestXlsmReaderTests(XlrdTests): - ext = '.xlsm' - engine_name = 'xlrd' - - class ExcelWriterBase(SharedItems): # Base class for test cases to run with different Excel writers. # To add a writer test, define the following: From 44fa2f932ffd3f89eec564d67c502913959a9915 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 10:34:38 -0800 Subject: [PATCH 03/19] Moved xlrd skip to SharedItems --- pandas/tests/io/test_excel.py | 86 +---------------------------------- 1 file changed, 1 insertion(+), 85 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 7823745caaf09..5aca057b2db9b 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -28,16 +28,6 @@ from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf -def _skip_if_no_xlrd(): - try: - import xlrd - ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) - if ver < (0, 9): - pytest.skip('xlrd < 0.9, skipping') - except ImportError: - pytest.skip('xlrd not installed, skipping') - - def _skip_if_no_xlwt(): try: import xlwt # NOQA @@ -59,12 +49,6 @@ def _skip_if_no_xlsxwriter(): pytest.skip('xlsxwriter not installed, skipping') -def _skip_if_no_excelsuite(): - _skip_if_no_xlrd() - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - - _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = DataFrame(_seriesd)[:10] @@ -74,6 +58,7 @@ def _skip_if_no_excelsuite(): _mixed_frame['foo'] = 'bar' +@td.skip_if_no('xlrd', '0.9') class SharedItems(object): def setup_method(self, method): @@ -554,7 +539,6 @@ def test_sheet_name_both_raises(self, ext): self.get_exceldf('test1', ext, sheetname='Sheet1', sheet_name='Sheet1') -@td.skip_if_no('xlrd', '0.9') @pytest.mark.parametrize("ext", ['.xls', '.xlsx', '.xlsm']) class TestXlrdReader(ReadingTestsBase): """ @@ -1069,7 +1053,6 @@ def teardown_method(self, method): set_option(self.option_name, self.prev_engine) def test_excel_sheet_by_name_raise(self): - _skip_if_no_xlrd() import xlrd with ensure_clean(self.ext) as pth: @@ -1083,8 +1066,6 @@ def test_excel_sheet_by_name_raise(self): read_excel(xl, '0') def test_excelwriter_contextmanager(self): - _skip_if_no_xlrd() - with ensure_clean(self.ext) as pth: with ExcelWriter(pth) as writer: self.frame.to_excel(writer, 'Data1') @@ -1097,8 +1078,6 @@ def test_excelwriter_contextmanager(self): tm.assert_frame_equal(found_df2, self.frame2) def test_roundtrip(self): - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan @@ -1147,8 +1126,6 @@ def test_roundtrip(self): tm.assert_frame_equal(s.to_frame(), recons) def test_mixed(self): - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: self.mixed_frame.to_excel(path, 'test1') reader = ExcelFile(path) @@ -1156,8 +1133,6 @@ def test_mixed(self): tm.assert_frame_equal(self.mixed_frame, recons) def test_tsframe(self): - _skip_if_no_xlrd() - df = tm.makeTimeDataFrame()[:5] with ensure_clean(self.ext) as path: @@ -1167,7 +1142,6 @@ def test_tsframe(self): tm.assert_frame_equal(df, recons) def test_basics_with_nan(self): - _skip_if_no_xlrd() with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') @@ -1176,8 +1150,6 @@ def test_basics_with_nan(self): self.frame.to_excel(path, 'test1', index=False) def test_int_types(self): - _skip_if_no_xlrd() - for np_type in (np.int8, np.int16, np.int32, np.int64): with ensure_clean(self.ext) as path: @@ -1201,8 +1173,6 @@ def test_int_types(self): check_column_type=False) def test_float_types(self): - _skip_if_no_xlrd() - for np_type in (np.float16, np.float32, np.float64): with ensure_clean(self.ext) as path: # Test np.float values read come back as float. @@ -1213,8 +1183,6 @@ def test_float_types(self): tm.assert_frame_equal(frame, recons, check_dtype=False) def test_bool_types(self): - _skip_if_no_xlrd() - for np_type in (np.bool8, np.bool_): with ensure_clean(self.ext) as path: # Test np.bool values read come back as float. @@ -1225,8 +1193,6 @@ def test_bool_types(self): tm.assert_frame_equal(frame, recons) def test_inf_roundtrip(self): - _skip_if_no_xlrd() - frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) with ensure_clean(self.ext) as path: frame.to_excel(path, 'test1') @@ -1235,8 +1201,6 @@ def test_inf_roundtrip(self): tm.assert_frame_equal(frame, recons) def test_sheets(self): - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan @@ -1260,8 +1224,6 @@ def test_sheets(self): assert 'test2' == reader.sheet_names[1] def test_colaliases(self): - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan @@ -1280,8 +1242,6 @@ def test_colaliases(self): tm.assert_frame_equal(xp, rs) def test_roundtrip_indexlabels(self): - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan @@ -1342,8 +1302,6 @@ def test_roundtrip_indexlabels(self): tm.assert_frame_equal(df, recons, check_less_precise=True) def test_excel_roundtrip_indexname(self): - _skip_if_no_xlrd() - df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' @@ -1358,8 +1316,6 @@ def test_excel_roundtrip_indexname(self): assert result.index.name == 'foo' def test_excel_roundtrip_datetime(self): - _skip_if_no_xlrd() - # datetime.date, not sure what to test here exactly tsf = self.tsframe.copy() with ensure_clean(self.ext) as path: @@ -1372,7 +1328,6 @@ def test_excel_roundtrip_datetime(self): # GH4133 - excel output format strings def test_excel_date_datetime_format(self): - _skip_if_no_xlrd() df = DataFrame([[date(2014, 1, 31), date(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), @@ -1411,8 +1366,6 @@ def test_excel_date_datetime_format(self): def test_to_excel_interval_no_labels(self): # GH19242 - test writing Interval without labels - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) @@ -1426,8 +1379,6 @@ def test_to_excel_interval_no_labels(self): def test_to_excel_interval_labels(self): # GH19242 - test writing Interval with labels - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) @@ -1443,8 +1394,6 @@ def test_to_excel_interval_labels(self): def test_to_excel_timedelta(self): # GH 19242, GH9155 - test writing timedelta to xls - _skip_if_no_xlrd() - with ensure_clean('.xls') as path: frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), columns=['A'], @@ -1460,8 +1409,6 @@ def test_to_excel_timedelta(self): tm.assert_frame_equal(expected, recons) def test_to_excel_periodindex(self): - _skip_if_no_xlrd() - frame = self.tsframe xp = frame.resample('M', kind='period').mean() @@ -1473,8 +1420,6 @@ def test_to_excel_periodindex(self): tm.assert_frame_equal(xp, rs.to_period('M')) def test_to_excel_multiindex(self): - _skip_if_no_xlrd() - frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, @@ -1493,8 +1438,6 @@ def test_to_excel_multiindex(self): # GH13511 def test_to_excel_multiindex_nan_label(self): - _skip_if_no_xlrd() - frame = pd.DataFrame({'A': [None, 2, 3], 'B': [10, 20, 30], 'C': np.random.sample(3)}) @@ -1509,8 +1452,6 @@ def test_to_excel_multiindex_nan_label(self): # sure they are handled correctly for either setting of # merge_cells def test_to_excel_multiindex_cols(self): - _skip_if_no_xlrd() - frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, @@ -1537,8 +1478,6 @@ def test_to_excel_multiindex_cols(self): tm.assert_frame_equal(frame, df) def test_to_excel_multiindex_dates(self): - _skip_if_no_xlrd() - # try multiindex with dates tsframe = self.tsframe.copy() new_index = [tsframe.index, np.arange(len(tsframe.index))] @@ -1555,8 +1494,6 @@ def test_to_excel_multiindex_dates(self): assert recons.index.names == ('time', 'foo') def test_to_excel_multiindex_no_write_index(self): - _skip_if_no_xlrd() - # Test writing and re-reading a MI witout the index. GH 5616. # Initial non-MI frame. @@ -1580,8 +1517,6 @@ def test_to_excel_multiindex_no_write_index(self): tm.assert_frame_equal(frame1, frame3) def test_to_excel_float_format(self): - _skip_if_no_xlrd() - df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) @@ -1597,8 +1532,6 @@ def test_to_excel_float_format(self): tm.assert_frame_equal(rs, xp) def test_to_excel_output_encoding(self): - _skip_if_no_xlrd() - # avoid mixed inferred_type df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'], [u'\u0195', u'\u0196', u'\u0197']], @@ -1612,7 +1545,6 @@ def test_to_excel_output_encoding(self): tm.assert_frame_equal(result, df) def test_to_excel_unicode_filename(self): - _skip_if_no_xlrd() with ensure_clean(u('\u0192u.') + self.ext) as filename: try: f = open(filename, 'wb') @@ -1735,8 +1667,6 @@ def test_to_excel_unicode_filename(self): # os.remove(filename) def test_excel_010_hemstring(self): - _skip_if_no_xlrd() - if self.merge_cells: pytest.skip('Skip tests for merged MI format.') @@ -1790,8 +1720,6 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): def test_excel_010_hemstring_raises_NotImplementedError(self): # This test was failing only for j>1 and header=False, # So I reproduced a simple test. - _skip_if_no_xlrd() - if self.merge_cells: pytest.skip('Skip tests for merged MI format.') @@ -1818,8 +1746,6 @@ def roundtrip2(df, header=True, parser_hdr=0, index=True): def test_duplicated_columns(self): # Test for issue #5235 - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) colnames = ['A', 'B', 'B'] @@ -1847,8 +1773,6 @@ def test_duplicated_columns(self): def test_swapped_columns(self): # Test for issue #5427. - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) @@ -1861,8 +1785,6 @@ def test_swapped_columns(self): def test_invalid_columns(self): # 10982 - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) @@ -1942,8 +1864,6 @@ def test_comment_emptyline(self): def test_datetimes(self): # Test writing and reading datetimes. For issue #9139. (xref #9185) - _skip_if_no_xlrd() - datetimes = [datetime(2013, 1, 13, 1, 2, 3), datetime(2013, 1, 13, 2, 45, 56), datetime(2013, 1, 13, 4, 29, 49), @@ -1965,8 +1885,6 @@ def test_datetimes(self): # GH7074 def test_bytes_io(self): - _skip_if_no_xlrd() - bio = BytesIO() df = DataFrame(np.random.randn(10, 2)) # pass engine explicitly as there is no file path to infer from @@ -1979,8 +1897,6 @@ def test_bytes_io(self): # GH8188 def test_write_lists_dict(self): - _skip_if_no_xlrd() - df = DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}], 'numeric': [1, 2, 3.0], 'str': ['apple', 'banana', 'cherry']}) From 069d700861dff527e799392048296b7817045aa3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 11:46:09 -0800 Subject: [PATCH 04/19] Parametrized ExcelWriter subclasses --- pandas/tests/io/test_excel.py | 295 +++++++++++++++------------------- 1 file changed, 129 insertions(+), 166 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 5aca057b2db9b..386549d6f375e 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1030,32 +1030,23 @@ def test_read_excel_squeeze(self, ext): tm.assert_series_equal(actual, expected) -class ExcelWriterBase(SharedItems): +@pytest.mark.parametrize("merge_cells", [True, False]) +@pytest.mark.parametrize("engine,ext", [ + pytest.param('openpyxl', '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('openpyxl'), reason='No openpyxl')), + pytest.param('xlwt', '.xls', marks=pytest.mark.skipif( + not td.safe_import('xlwt'), reason='No xlwt')), + pytest.param('xlsxwriter', '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('xlsxwriter'), reason='No xlsxwriter')) +]) +class TestExcelWriter(SharedItems): # Base class for test cases to run with different Excel writers. - # To add a writer test, define the following: - # 1. A check_skip function that skips your tests if your writer isn't - # installed. - # 2. Add a property ext, which is the file extension that your writer - # writes to. (needs to start with '.' so it's a valid path) - # 3. Add a property engine_name, which is the name of the writer class. - - # Test with MultiIndex and Hierarchical Rows as merged cells. merge_cells = True - def setup_method(self, method): - self.check_skip() - super(ExcelWriterBase, self).setup_method(method) - self.option_name = 'io.excel.%s.writer' % self.ext.strip('.') - self.prev_engine = get_option(self.option_name) - set_option(self.option_name, self.engine_name) - - def teardown_method(self, method): - set_option(self.option_name, self.prev_engine) - - def test_excel_sheet_by_name_raise(self): + def test_excel_sheet_by_name_raise(self, merge_cells, engine, ext): import xlrd - with ensure_clean(self.ext) as pth: + with ensure_clean(ext) as pth: gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(pth) xl = ExcelFile(pth) @@ -1065,8 +1056,8 @@ def test_excel_sheet_by_name_raise(self): with pytest.raises(xlrd.XLRDError): read_excel(xl, '0') - def test_excelwriter_contextmanager(self): - with ensure_clean(self.ext) as pth: + def test_excelwriter_contextmanager(self, merge_cells, engine, ext): + with ensure_clean(ext) as pth: with ExcelWriter(pth) as writer: self.frame.to_excel(writer, 'Data1') self.frame2.to_excel(writer, 'Data2') @@ -1077,8 +1068,8 @@ def test_excelwriter_contextmanager(self): tm.assert_frame_equal(found_df, self.frame) tm.assert_frame_equal(found_df2, self.frame2) - def test_roundtrip(self): - with ensure_clean(self.ext) as path: + def test_roundtrip(self, merge_cells, engine, ext): + with ensure_clean(ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') @@ -1125,34 +1116,34 @@ def test_roundtrip(self): recons = read_excel(path, index_col=0) tm.assert_frame_equal(s.to_frame(), recons) - def test_mixed(self): - with ensure_clean(self.ext) as path: + def test_mixed(self, merge_cells, engine, ext): + with ensure_clean(ext) as path: self.mixed_frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = read_excel(reader, 'test1', index_col=0) tm.assert_frame_equal(self.mixed_frame, recons) - def test_tsframe(self): + def test_tsframe(self, merge_cells, engine, ext): df = tm.makeTimeDataFrame()[:5] - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, 'test1') reader = ExcelFile(path) recons = read_excel(reader, 'test1') tm.assert_frame_equal(df, recons) - def test_basics_with_nan(self): - with ensure_clean(self.ext) as path: + def test_basics_with_nan(self, merge_cells, engine, ext): + with ensure_clean(ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', columns=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) - def test_int_types(self): + def test_int_types(self, merge_cells, engine, ext): for np_type in (np.int8, np.int16, np.int32, np.int64): - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: # Test np.int values read come back as int (rather than float # which is Excel's format). frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), @@ -1172,9 +1163,9 @@ def test_int_types(self): check_index_type=False, check_column_type=False) - def test_float_types(self): + def test_float_types(self, merge_cells, engine, ext): for np_type in (np.float16, np.float32, np.float64): - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: # Test np.float values read come back as float. frame = DataFrame(np.random.random_sample(10), dtype=np_type) frame.to_excel(path, 'test1') @@ -1182,9 +1173,9 @@ def test_float_types(self): recons = read_excel(reader, 'test1').astype(np_type) tm.assert_frame_equal(frame, recons, check_dtype=False) - def test_bool_types(self): + def test_bool_types(self, merge_cells, engine, ext): for np_type in (np.bool8, np.bool_): - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: # Test np.bool values read come back as float. frame = (DataFrame([1, 0, True, False], dtype=np_type)) frame.to_excel(path, 'test1') @@ -1192,16 +1183,16 @@ def test_bool_types(self): recons = read_excel(reader, 'test1').astype(np_type) tm.assert_frame_equal(frame, recons) - def test_inf_roundtrip(self): + def test_inf_roundtrip(self, merge_cells, engine, ext): frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = read_excel(reader, 'test1') tm.assert_frame_equal(frame, recons) - def test_sheets(self): - with ensure_clean(self.ext) as path: + def test_sheets(self, merge_cells, engine, ext): + with ensure_clean(ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') @@ -1223,8 +1214,8 @@ def test_sheets(self): assert 'test1' == reader.sheet_names[0] assert 'test2' == reader.sheet_names[1] - def test_colaliases(self): - with ensure_clean(self.ext) as path: + def test_colaliases(self, merge_cells, engine, ext): + with ensure_clean(ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') @@ -1241,8 +1232,8 @@ def test_colaliases(self): xp.columns = col_aliases tm.assert_frame_equal(xp, rs) - def test_roundtrip_indexlabels(self): - with ensure_clean(self.ext) as path: + def test_roundtrip_indexlabels(self, merge_cells, engine, ext): + with ensure_clean(ext) as path: self.frame['A'][:5] = nan @@ -1255,7 +1246,7 @@ def test_roundtrip_indexlabels(self): frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label=['test'], - merge_cells=self.merge_cells) + merge_cells=merge_cells) reader = ExcelFile(path) recons = read_excel(reader, 'test1', index_col=0, @@ -1267,7 +1258,7 @@ def test_roundtrip_indexlabels(self): frame.to_excel(path, 'test1', index_label=['test', 'dummy', 'dummy2'], - merge_cells=self.merge_cells) + merge_cells=merge_cells) reader = ExcelFile(path) recons = read_excel(reader, 'test1', index_col=0, @@ -1279,7 +1270,7 @@ def test_roundtrip_indexlabels(self): frame.to_excel(path, 'test1', index_label='test', - merge_cells=self.merge_cells) + merge_cells=merge_cells) reader = ExcelFile(path) recons = read_excel(reader, 'test1', index_col=0, @@ -1287,12 +1278,12 @@ def test_roundtrip_indexlabels(self): frame.index.names = ['test'] tm.assert_frame_equal(frame, recons.astype(bool)) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: self.frame.to_excel(path, 'test1', columns=['A', 'B', 'C', 'D'], - index=False, merge_cells=self.merge_cells) + index=False, merge_cells=merge_cells) # take 'A' and 'B' as indexes (same row as cols 'C', 'D') df = self.frame.copy() df = df.set_index(['A', 'B']) @@ -1301,12 +1292,12 @@ def test_roundtrip_indexlabels(self): recons = read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(df, recons, check_less_precise=True) - def test_excel_roundtrip_indexname(self): + def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' - with ensure_clean(self.ext) as path: - df.to_excel(path, merge_cells=self.merge_cells) + with ensure_clean(ext) as path: + df.to_excel(path, merge_cells=merge_cells) xf = ExcelFile(path) result = read_excel(xf, xf.sheet_names[0], @@ -1315,19 +1306,19 @@ def test_excel_roundtrip_indexname(self): tm.assert_frame_equal(result, df) assert result.index.name == 'foo' - def test_excel_roundtrip_datetime(self): + def test_excel_roundtrip_datetime(self, merge_cells, engine, ext): # datetime.date, not sure what to test here exactly tsf = self.tsframe.copy() - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: tsf.index = [x.date() for x in self.tsframe.index] - tsf.to_excel(path, 'test1', merge_cells=self.merge_cells) + tsf.to_excel(path, 'test1', merge_cells=merge_cells) reader = ExcelFile(path) recons = read_excel(reader, 'test1') tm.assert_frame_equal(self.tsframe, recons) # GH4133 - excel output format strings - def test_excel_date_datetime_format(self): + def test_excel_date_datetime_format(self, merge_cells, engine, ext): df = DataFrame([[date(2014, 1, 31), date(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), @@ -1339,8 +1330,8 @@ def test_excel_date_datetime_format(self): datetime(2014, 2, 28, 13, 5, 13)]], index=['DATE', 'DATETIME'], columns=['X', 'Y']) - with ensure_clean(self.ext) as filename1: - with ensure_clean(self.ext) as filename2: + with ensure_clean(ext) as filename1: + with ensure_clean(ext) as filename2: writer1 = ExcelWriter(filename1) writer2 = ExcelWriter(filename2, date_format='DD.MM.YYYY', @@ -1364,9 +1355,9 @@ def test_excel_date_datetime_format(self): # to use df_expected to check the result tm.assert_frame_equal(rs2, df_expected) - def test_to_excel_interval_no_labels(self): + def test_to_excel_interval_no_labels(self, merge_cells, engine, ext): # GH19242 - test writing Interval without labels - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() @@ -1377,9 +1368,9 @@ def test_to_excel_interval_no_labels(self): recons = read_excel(reader, 'test1') tm.assert_frame_equal(expected, recons) - def test_to_excel_interval_labels(self): + def test_to_excel_interval_labels(self, merge_cells, engine, ext): # GH19242 - test writing Interval with labels - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() @@ -1392,7 +1383,7 @@ def test_to_excel_interval_labels(self): recons = read_excel(reader, 'test1') tm.assert_frame_equal(expected, recons) - def test_to_excel_timedelta(self): + def test_to_excel_timedelta(self, merge_cells, engine, ext): # GH 19242, GH9155 - test writing timedelta to xls with ensure_clean('.xls') as path: frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), @@ -1408,50 +1399,50 @@ def test_to_excel_timedelta(self): recons = read_excel(reader, 'test1') tm.assert_frame_equal(expected, recons) - def test_to_excel_periodindex(self): + def test_to_excel_periodindex(self, merge_cells, engine, ext): frame = self.tsframe xp = frame.resample('M', kind='period').mean() - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: xp.to_excel(path, 'sht1') reader = ExcelFile(path) rs = read_excel(reader, 'sht1', index_col=0) tm.assert_frame_equal(xp, rs.to_period('M')) - def test_to_excel_multiindex(self): + def test_to_excel_multiindex(self, merge_cells, engine, ext): frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: frame.to_excel(path, 'test1', header=False) frame.to_excel(path, 'test1', columns=['A', 'B']) # round trip - frame.to_excel(path, 'test1', merge_cells=self.merge_cells) + frame.to_excel(path, 'test1', merge_cells=merge_cells) reader = ExcelFile(path) df = read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511 - def test_to_excel_multiindex_nan_label(self): + def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext): frame = pd.DataFrame({'A': [None, 2, 3], 'B': [10, 20, 30], 'C': np.random.sample(3)}) frame = frame.set_index(['A', 'B']) - with ensure_clean(self.ext) as path: - frame.to_excel(path, merge_cells=self.merge_cells) + with ensure_clean(ext) as path: + frame.to_excel(path, merge_cells=merge_cells) df = read_excel(path, index_col=[0, 1]) tm.assert_frame_equal(frame, df) # Test for Issue 11328. If column indices are integers, make # sure they are handled correctly for either setting of # merge_cells - def test_to_excel_multiindex_cols(self): + def test_to_excel_multiindex_cols(self, merge_cells, engine, ext): frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, @@ -1462,30 +1453,30 @@ def test_to_excel_multiindex_cols(self): (50, 1), (50, 2)]) frame.columns = new_cols_index header = [0, 1] - if not self.merge_cells: + if not merge_cells: header = 0 - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: # round trip - frame.to_excel(path, 'test1', merge_cells=self.merge_cells) + frame.to_excel(path, 'test1', merge_cells=merge_cells) reader = ExcelFile(path) df = read_excel(reader, 'test1', header=header, index_col=[0, 1]) - if not self.merge_cells: + if not merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False) frame.columns = [".".join(map(str, q)) for q in zip(*fm)] tm.assert_frame_equal(frame, df) - def test_to_excel_multiindex_dates(self): + def test_to_excel_multiindex_dates(self, merge_cells, engine, ext): # try multiindex with dates tsframe = self.tsframe.copy() new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: tsframe.index.names = ['time', 'foo'] - tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells) + tsframe.to_excel(path, 'test1', merge_cells=merge_cells) reader = ExcelFile(path) recons = read_excel(reader, 'test1', index_col=[0, 1]) @@ -1493,7 +1484,7 @@ def test_to_excel_multiindex_dates(self): tm.assert_frame_equal(tsframe, recons) assert recons.index.names == ('time', 'foo') - def test_to_excel_multiindex_no_write_index(self): + def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, ext): # Test writing and re-reading a MI witout the index. GH 5616. # Initial non-MI frame. @@ -1504,7 +1495,7 @@ def test_to_excel_multiindex_no_write_index(self): multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)]) frame2.index = multi_index - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: # Write out to Excel without the index. frame2.to_excel(path, 'test1', index=False) @@ -1516,12 +1507,12 @@ def test_to_excel_multiindex_no_write_index(self): # Test that it is the same as the initial frame. tm.assert_frame_equal(frame1, frame3) - def test_to_excel_float_format(self): + def test_to_excel_float_format(self, merge_cells, engine, ext): df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) - with ensure_clean(self.ext) as filename: + with ensure_clean(ext) as filename: df.to_excel(filename, 'test1', float_format='%.2f') reader = ExcelFile(filename) @@ -1531,21 +1522,20 @@ def test_to_excel_float_format(self): index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp) - def test_to_excel_output_encoding(self): + def test_to_excel_output_encoding(self, merge_cells, engine, ext): # avoid mixed inferred_type df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'], [u'\u0195', u'\u0196', u'\u0197']], index=[u'A\u0192', u'B'], columns=[u'X\u0193', u'Y', u'Z']) - with ensure_clean('__tmp_to_excel_float_format__.' + self.ext)\ - as filename: + with ensure_clean('__tmp_to_excel_float_format__.' + ext) as filename: df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') result = read_excel(filename, 'TestSheet', encoding='utf8') tm.assert_frame_equal(result, df) - def test_to_excel_unicode_filename(self): - with ensure_clean(u('\u0192u.') + self.ext) as filename: + def test_to_excel_unicode_filename(self, merge_cells, engine, ext): + with ensure_clean(u('\u0192u.') + ext) as filename: try: f = open(filename, 'wb') except UnicodeEncodeError: @@ -1566,7 +1556,7 @@ def test_to_excel_unicode_filename(self): index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp) - # def test_to_excel_header_styling_xls(self): + # def test_to_excel_header_styling_xls(self, merge_cells, engine, ext): # import StringIO # s = StringIO( @@ -1613,7 +1603,7 @@ def test_to_excel_unicode_filename(self): # assert 1 == cell_xf.border.left_line_style # assert 2 == cell_xf.alignment.hor_align # os.remove(filename) - # def test_to_excel_header_styling_xlsx(self): + # def test_to_excel_header_styling_xlsx(self, merge_cells, engine, ext): # import StringIO # s = StringIO( # """Date,ticker,type,value @@ -1666,8 +1656,8 @@ def test_to_excel_unicode_filename(self): # assert ws.cell(maddr).merged # os.remove(filename) - def test_excel_010_hemstring(self): - if self.merge_cells: + def test_excel_010_hemstring(self, merge_cells, engine, ext): + if merge_cells: pytest.skip('Skip tests for merged MI format.') from pandas.util.testing import makeCustomDataframe as mkdf @@ -1676,9 +1666,9 @@ def test_excel_010_hemstring(self): def roundtrip(df, header=True, parser_hdr=0, index=True): - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, header=header, - merge_cells=self.merge_cells, index=index) + merge_cells=merge_cells, index=index) xf = ExcelFile(path) res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) return res @@ -1717,10 +1707,10 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): assert res.shape == (1, 2) assert res.iloc[0, 0] is not np.nan - def test_excel_010_hemstring_raises_NotImplementedError(self): + def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells, engine, ext): # This test was failing only for j>1 and header=False, # So I reproduced a simple test. - if self.merge_cells: + if merge_cells: pytest.skip('Skip tests for merged MI format.') from pandas.util.testing import makeCustomDataframe as mkdf @@ -1729,9 +1719,9 @@ def test_excel_010_hemstring_raises_NotImplementedError(self): def roundtrip2(df, header=True, parser_hdr=0, index=True): - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, header=header, - merge_cells=self.merge_cells, index=index) + merge_cells=merge_cells, index=index) xf = ExcelFile(path) res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) return res @@ -1744,9 +1734,9 @@ def roundtrip2(df, header=True, parser_hdr=0, index=True): with pytest.raises(NotImplementedError): roundtrip2(df, header=False, index=False) - def test_duplicated_columns(self): + def test_duplicated_columns(self, merge_cells, engine, ext): # Test for issue #5235 - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) colnames = ['A', 'B', 'B'] @@ -1771,9 +1761,9 @@ def test_duplicated_columns(self): write_frame.columns = [0, 1, 2, 3] tm.assert_frame_equal(write_frame, read_frame) - def test_swapped_columns(self): + def test_swapped_columns(self, merge_cells, engine, ext): # Test for issue #5427. - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) write_frame.to_excel(path, 'test1', columns=['B', 'A']) @@ -1783,9 +1773,9 @@ def test_swapped_columns(self): tm.assert_series_equal(write_frame['A'], read_frame['A']) tm.assert_series_equal(write_frame['B'], read_frame['B']) - def test_invalid_columns(self): + def test_invalid_columns(self, merge_cells, engine, ext): # 10982 - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) @@ -1799,10 +1789,10 @@ def test_invalid_columns(self): with pytest.raises(KeyError): write_frame.to_excel(path, 'test1', columns=['C', 'D']) - def test_comment_arg(self): + def test_comment_arg(self, merge_cells, engine, ext): # Re issue #18735 # Test the comment argument functionality to read_excel - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: # Create file to read in df = DataFrame({'A': ['one', '#one', 'one'], @@ -1817,10 +1807,10 @@ def test_comment_arg(self): result2 = read_excel(path, 'test_c', comment='#') tm.assert_frame_equal(result1, result2) - def test_comment_default(self): + def test_comment_default(self, merge_cells, engine, ext): # Re issue #18735 # Test the comment argument default to read_excel - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: # Create file to read in df = DataFrame({'A': ['one', '#one', 'one'], @@ -1832,10 +1822,10 @@ def test_comment_default(self): result2 = read_excel(path, 'test_c', comment=None) tm.assert_frame_equal(result1, result2) - def test_comment_used(self): + def test_comment_used(self, merge_cells, engine, ext): # Re issue #18735 # Test the comment argument is working as expected when used - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: # Create file to read in df = DataFrame({'A': ['one', '#one', 'one'], @@ -1848,10 +1838,10 @@ def test_comment_used(self): result = read_excel(path, 'test_c', comment='#') tm.assert_frame_equal(result, expected) - def test_comment_emptyline(self): + def test_comment_emptyline(self, merge_cells, engine, ext): # Re issue #18735 # Test that read_excel ignores commented lines at the end of file - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']}) df.to_excel(path, index=False) @@ -1861,7 +1851,7 @@ def test_comment_emptyline(self): result = read_excel(path, comment='#') tm.assert_frame_equal(result, expected) - def test_datetimes(self): + def test_datetimes(self, merge_cells, engine, ext): # Test writing and reading datetimes. For issue #9139. (xref #9185) datetimes = [datetime(2013, 1, 13, 1, 2, 3), @@ -1876,7 +1866,7 @@ def test_datetimes(self): datetime(2013, 1, 13, 16, 37, 0), datetime(2013, 1, 13, 18, 20, 52)] - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: write_frame = DataFrame({'A': datetimes}) write_frame.to_excel(path, 'Sheet1') read_frame = read_excel(path, 'Sheet1', header=0) @@ -1884,11 +1874,11 @@ def test_datetimes(self): tm.assert_series_equal(write_frame['A'], read_frame['A']) # GH7074 - def test_bytes_io(self): + def test_bytes_io(self, merge_cells, engine, ext): bio = BytesIO() df = DataFrame(np.random.randn(10, 2)) # pass engine explicitly as there is no file path to infer from - writer = ExcelWriter(bio, engine=self.engine_name) + writer = ExcelWriter(bio, engine=engine) df.to_excel(writer) writer.save() bio.seek(0) @@ -1896,58 +1886,58 @@ def test_bytes_io(self): tm.assert_frame_equal(df, reread_df) # GH8188 - def test_write_lists_dict(self): + def test_write_lists_dict(self, merge_cells, engine, ext): df = DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}], 'numeric': [1, 2, 3.0], 'str': ['apple', 'banana', 'cherry']}) expected = df.copy() expected.mixed = expected.mixed.apply(str) expected.numeric = expected.numeric.astype('int64') - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, 'Sheet1') read = read_excel(path, 'Sheet1', header=0) tm.assert_frame_equal(read, expected) # GH13347 - def test_true_and_false_value_options(self): + def test_true_and_false_value_options(self, merge_cells, engine, ext): df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2']) expected = df.replace({'foo': True, 'bar': False}) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path) read_frame = read_excel(path, true_values=['foo'], false_values=['bar']) tm.assert_frame_equal(read_frame, expected) - def test_freeze_panes(self): + def test_freeze_panes(self, merge_cells, engine, ext): # GH15160 expected = DataFrame([[1, 2], [3, 4]], columns=['col1', 'col2']) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: expected.to_excel(path, "Sheet1", freeze_panes=(1, 1)) result = read_excel(path) tm.assert_frame_equal(expected, result) - def test_path_pathlib(self): + def test_path_pathlib(self, merge_cells, engine, ext): df = tm.makeDataFrame() - writer = partial(df.to_excel, engine=self.engine_name) + writer = partial(df.to_excel, engine=engine) reader = partial(pd.read_excel) result = tm.round_trip_pathlib(writer, reader, - path="foo.{}".format(self.ext)) + path="foo.{}".format(ext)) tm.assert_frame_equal(df, result) - def test_path_localpath(self): + def test_path_localpath(self, merge_cells, engine, ext): df = tm.makeDataFrame() - writer = partial(df.to_excel, engine=self.engine_name) + writer = partial(df.to_excel, engine=engine) reader = partial(pd.read_excel) result = tm.round_trip_pathlib(writer, reader, - path="foo.{}".format(self.ext)) + path="foo.{}".format(ext)) tm.assert_frame_equal(df, result) -class TestOpenpyxlTests(ExcelWriterBase): +@td.skip_if_no('openpyxl') +class TestOpenpyxlTests(SharedItems): engine_name = 'openpyxl' ext = '.xlsx' - check_skip = staticmethod(_skip_if_no_openpyxl) def test_to_excel_styleconverter(self): from openpyxl import styles @@ -2036,10 +2026,10 @@ def test_write_cells_merge_styled(self): assert xcell_a2.font == openpyxl_sty_merged -class TestXlwtTests(ExcelWriterBase): +@td.skip_if_no('xlwt') +class TestXlwtTests(SharedItems): ext = '.xls' engine_name = 'xlwt' - check_skip = staticmethod(_skip_if_no_xlwt) def test_excel_raise_error_on_multiindex_columns_and_no_index(self): _skip_if_no_xlwt() @@ -2049,7 +2039,7 @@ def test_excel_raise_error_on_multiindex_columns_and_no_index(self): ('2014', 'weight')]) df = DataFrame(np.random.randn(10, 3), columns=cols) with pytest.raises(NotImplementedError): - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, index=False) def test_excel_multiindex_columns_and_index_true(self): @@ -2058,7 +2048,7 @@ def test_excel_multiindex_columns_and_index_true(self): ('2014', 'height'), ('2014', 'weight')]) df = pd.DataFrame(np.random.randn(10, 3), columns=cols) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, index=True) def test_excel_multiindex_index(self): @@ -2068,7 +2058,7 @@ def test_excel_multiindex_index(self): ('2014', 'height'), ('2014', 'weight')]) df = DataFrame(np.random.randn(3, 10), index=cols) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, index=False) def test_to_excel_styleconverter(self): @@ -2093,10 +2083,10 @@ def test_to_excel_styleconverter(self): assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert -class TestXlsxWriterTests(ExcelWriterBase): +@td.skip_if_no('xlsxwriter') +class TestXlsxWriterTests(SharedItems): ext = '.xlsx' engine_name = 'xlsxwriter' - check_skip = staticmethod(_skip_if_no_xlsxwriter) def test_column_format(self): # Test that column formats are applied to cells. Test for issue #9167. @@ -2109,7 +2099,7 @@ def test_column_format(self): _skip_if_no_openpyxl() import openpyxl - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: frame = DataFrame({'A': [123456, 123456], 'B': [123456, 123456]}) @@ -2146,33 +2136,6 @@ def test_column_format(self): assert read_num_format == num_format -class TestOpenpyxlTests_NoMerge(ExcelWriterBase): - ext = '.xlsx' - engine_name = 'openpyxl' - check_skip = staticmethod(_skip_if_no_openpyxl) - - # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. - merge_cells = False - - -class TestXlwtTests_NoMerge(ExcelWriterBase): - ext = '.xls' - engine_name = 'xlwt' - check_skip = staticmethod(_skip_if_no_xlwt) - - # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. - merge_cells = False - - -class TestXlsxWriterTests_NoMerge(ExcelWriterBase): - ext = '.xlsx' - engine_name = 'xlsxwriter' - check_skip = staticmethod(_skip_if_no_xlsxwriter) - - # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. - merge_cells = False - - class TestExcelWriterEngineTests(object): def test_ExcelWriter_dispatch(self): From 0f64f3a8975e4c53765e9f3c748fd121677fb880 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 11:53:08 -0800 Subject: [PATCH 05/19] Finished up decorator cleanup for reads, linted --- pandas/tests/io/test_excel.py | 60 +++++++++++++++++------------------ 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 386549d6f375e..7896e30f5ef57 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -134,8 +134,8 @@ def test_usecols_int(self, ext): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['A', 'B', 'C']) df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=3) - df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, - usecols=3) + df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols=3) with tm.assert_produces_warning(FutureWarning): df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], @@ -152,8 +152,8 @@ def test_usecols_list(self, ext): dfref = dfref.reindex(columns=['B', 'C']) df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=[0, 2, 3]) - df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, - usecols=[0, 2, 3]) + df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols=[0, 2, 3]) with tm.assert_produces_warning(FutureWarning): df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], @@ -171,8 +171,8 @@ def test_usecols_str(self, ext): df1 = dfref.reindex(columns=['A', 'B', 'C']) df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols='A:D') - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, - usecols='A:D') + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols='A:D') with tm.assert_produces_warning(FutureWarning): df4 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], @@ -277,7 +277,8 @@ def test_excel_table(self, ext): dfref = self.get_csv_refdf('test1') df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0) - df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0) + df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0) # TODO add index to file tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) @@ -350,7 +351,8 @@ def test_reader_converters(self, ext): # should read in correctly and set types of single cells (not array # dtypes) - actual = self.get_exceldf(basename, ext, 'Sheet1', converters=converters) + actual = self.get_exceldf(basename, ext, 'Sheet1', + converters=converters) tm.assert_frame_equal(actual, expected) def test_reader_dtype(self, ext): @@ -426,11 +428,10 @@ def test_read_excel_blank_with_header(self, ext): actual = self.get_exceldf('blank_with_header', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') # GH 12292 : error when read one empty column from excel file def test_read_one_empty_col_no_header(self, ext): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - df = pd.DataFrame( [["", 1, 100], ["", 2, 200], @@ -456,10 +457,9 @@ def test_read_one_empty_col_no_header(self, ext): tm.assert_frame_equal(actual_header_none, expected) tm.assert_frame_equal(actual_header_zero, expected) + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') def test_read_one_empty_col_with_header(self, ext): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - df = pd.DataFrame( [["", 1, 100], ["", 2, 200], @@ -486,10 +486,9 @@ def test_read_one_empty_col_with_header(self, ext): expected_header_zero = DataFrame(columns=[0], dtype='int64') tm.assert_frame_equal(actual_header_zero, expected_header_zero) + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') def test_set_column_names_in_parameter(self, ext): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - # GH 12870 : pass down column names associated with # keyword argument names refdf = pd.DataFrame([[1, 'foo'], [2, 'bar'], @@ -529,14 +528,16 @@ def test_sheet_name_and_sheetname(self, ext): dfref = self.get_csv_refdf('test1') df1 = self.get_exceldf('test1', ext, sheet_name='Sheet1') # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df2 = self.get_exceldf('test1', ext, sheetname='Sheet1') # bkwrd compat + df2 = self.get_exceldf('test1', ext, + sheetname='Sheet1') # bkwrd compat tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) def test_sheet_name_both_raises(self, ext): with tm.assert_raises_regex(TypeError, "Cannot specify both"): - self.get_exceldf('test1', ext, sheetname='Sheet1', sheet_name='Sheet1') + self.get_exceldf('test1', ext, sheetname='Sheet1', + sheet_name='Sheet1') @pytest.mark.parametrize("ext", ['.xls', '.xlsx', '.xlsm']) @@ -559,9 +560,8 @@ def test_excel_read_buffer(self, ext): actual = read_excel(xls, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) + @td.skip_if_no('xlwt') def test_read_xlrd_Book(self, ext): - _skip_if_no_xlwt() - df = self.frame with ensure_clean('.xls') as pth: df.to_excel(pth, "SheetA") @@ -659,14 +659,12 @@ def test_reader_closes_file(self, ext): assert f.closed + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') def test_creating_and_reading_multiple_sheets(self, ext): # Test reading multiple sheets, from a runtime created excel file # with multiple sheets. # See PR #9450 - - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - def tdf(sheetname): d, i = [11, 22, 33], [1, 2, 3] return DataFrame(d, i, columns=[sheetname]) @@ -778,9 +776,9 @@ def test_read_excel_multiindex(self, ext): header=[0, 1], skiprows=2) tm.assert_frame_equal(actual, expected) + @td.skip_if_no('xlsxwriter') def test_read_excel_multiindex_empty_level(self, ext): # GH 12453 - _skip_if_no_xlsxwriter() with ensure_clean('.xlsx') as path: df = DataFrame({ ('Zero', ''): {0: 0}, @@ -818,9 +816,9 @@ def test_read_excel_multiindex_empty_level(self, ext): actual = pd.read_excel(path, header=[0, 1]) tm.assert_frame_equal(actual, expected) + @td.skip_if_no('xlsxwriter') def test_excel_multindex_roundtrip(self, ext): # GH 4679 - _skip_if_no_xlsxwriter() with ensure_clean('.xlsx') as pth: for c_idx_names in [True, False]: for r_idx_names in [True, False]: @@ -937,11 +935,10 @@ def test_read_excel_chunksize(self, ext): pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), chunksize=100) + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') def test_read_excel_parse_dates(self, ext): # GH 11544, 12051 - _skip_if_no_openpyxl() - _skip_if_no_xlwt() # for df2.to_excel - df = DataFrame( {'col': [1, 2, 3], 'date_strings': pd.date_range('2012-01-01', periods=3)}) @@ -1707,7 +1704,8 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): assert res.shape == (1, 2) assert res.iloc[0, 0] is not np.nan - def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells, engine, ext): + def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells, + engine,ext): # This test was failing only for j>1 and header=False, # So I reproduced a simple test. if merge_cells: From ed761efb2a27cf19e4dc0d17d896f8b5ee55a6b0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 12:03:28 -0800 Subject: [PATCH 06/19] Fixed issue with xlwt install --- pandas/tests/io/test_excel.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 7896e30f5ef57..e0bc9a7979d12 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -562,6 +562,8 @@ def test_excel_read_buffer(self, ext): @td.skip_if_no('xlwt') def test_read_xlrd_Book(self, ext): + import xlrd + df = self.frame with ensure_clean('.xls') as pth: df.to_excel(pth, "SheetA") @@ -2025,11 +2027,10 @@ def test_write_cells_merge_styled(self): @td.skip_if_no('xlwt') +@pytest.mark.parametrize("ext", ['.xls']) class TestXlwtTests(SharedItems): - ext = '.xls' - engine_name = 'xlwt' - def test_excel_raise_error_on_multiindex_columns_and_no_index(self): + def test_excel_raise_error_on_multiindex_columns_and_no_index(self, ext): _skip_if_no_xlwt() # MultiIndex as columns is not yet implemented 9794 cols = MultiIndex.from_tuples([('site', ''), @@ -2040,7 +2041,7 @@ def test_excel_raise_error_on_multiindex_columns_and_no_index(self): with ensure_clean(ext) as path: df.to_excel(path, index=False) - def test_excel_multiindex_columns_and_index_true(self): + def test_excel_multiindex_columns_and_index_true(self, ext): _skip_if_no_xlwt() cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), @@ -2049,7 +2050,7 @@ def test_excel_multiindex_columns_and_index_true(self): with ensure_clean(ext) as path: df.to_excel(path, index=True) - def test_excel_multiindex_index(self): + def test_excel_multiindex_index(self, ext): _skip_if_no_xlwt() # MultiIndex as index works so assert no error #9794 cols = MultiIndex.from_tuples([('site', ''), @@ -2059,7 +2060,7 @@ def test_excel_multiindex_index(self): with ensure_clean(ext) as path: df.to_excel(path, index=False) - def test_to_excel_styleconverter(self): + def test_to_excel_styleconverter(self, ext): _skip_if_no_xlwt() import xlwt From 2dff85d435eebeb8be47d840f70b4d155803662c Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 12:07:30 -0800 Subject: [PATCH 07/19] Cleaned up all Test{} writer tests --- pandas/tests/io/test_excel.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index e0bc9a7979d12..4c8383ade048d 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1935,11 +1935,10 @@ def test_path_localpath(self, merge_cells, engine, ext): @td.skip_if_no('openpyxl') +@pytest.mark.parametrize("ext", ['.xlsx']) class TestOpenpyxlTests(SharedItems): - engine_name = 'openpyxl' - ext = '.xlsx' - def test_to_excel_styleconverter(self): + def test_to_excel_styleconverter(self, ext): from openpyxl import styles hstyle = { @@ -1993,7 +1992,7 @@ def test_to_excel_styleconverter(self): assert kw['number_format'] == number_format assert kw['protection'] == protection - def test_write_cells_merge_styled(self): + def test_write_cells_merge_styled(self, ext): from pandas.io.formats.excel import ExcelCell sheet_name = 'merge_styled' @@ -2014,7 +2013,7 @@ def test_write_cells_merge_styled(self): mergestart=1, mergeend=1, style=sty_merged), ] - with ensure_clean('.xlsx') as path: + with ensure_clean(ext) as path: writer = _OpenpyxlWriter(path) writer.write_cells(initial_cells, sheet_name=sheet_name) writer.write_cells(merge_cells, sheet_name=sheet_name) @@ -2031,7 +2030,6 @@ def test_write_cells_merge_styled(self): class TestXlwtTests(SharedItems): def test_excel_raise_error_on_multiindex_columns_and_no_index(self, ext): - _skip_if_no_xlwt() # MultiIndex as columns is not yet implemented 9794 cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), @@ -2042,7 +2040,6 @@ def test_excel_raise_error_on_multiindex_columns_and_no_index(self, ext): df.to_excel(path, index=False) def test_excel_multiindex_columns_and_index_true(self, ext): - _skip_if_no_xlwt() cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), ('2014', 'weight')]) @@ -2051,7 +2048,6 @@ def test_excel_multiindex_columns_and_index_true(self, ext): df.to_excel(path, index=True) def test_excel_multiindex_index(self, ext): - _skip_if_no_xlwt() # MultiIndex as index works so assert no error #9794 cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), @@ -2061,8 +2057,6 @@ def test_excel_multiindex_index(self, ext): df.to_excel(path, index=False) def test_to_excel_styleconverter(self, ext): - _skip_if_no_xlwt() - import xlwt hstyle = {"font": {"bold": True}, @@ -2083,15 +2077,12 @@ def test_to_excel_styleconverter(self, ext): @td.skip_if_no('xlsxwriter') +@pytest.mark.parametrize("ext", ['.xlsx']) class TestXlsxWriterTests(SharedItems): - ext = '.xlsx' - engine_name = 'xlsxwriter' - def test_column_format(self): + def test_column_format(self, ext): # Test that column formats are applied to cells. Test for issue #9167. # Applicable to xlsxwriter only. - _skip_if_no_xlsxwriter() - with warnings.catch_warnings(): # Ignore the openpyxl lxml warning. warnings.simplefilter("ignore") From 19768042cddbf7c8c2ee52efc6c94b9b7f136b8d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 12:52:39 -0800 Subject: [PATCH 08/19] Converted Writer setup/teardown to pytest fixture --- pandas/tests/io/test_excel.py | 46 +++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 4c8383ade048d..7cf846a15cb95 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1029,6 +1029,18 @@ def test_read_excel_squeeze(self, ext): tm.assert_series_equal(actual, expected) +class _WriterBase(SharedItems): + """Provides fixture to set / reset options for all writer tests""" + + @pytest.fixture(autouse=True) + def set_options(self, request, merge_cells, engine, ext): + option_name = 'io.excel.{ext}.writer'.format(ext=ext.strip('.')) + prev_engine = get_option(option_name) + set_option(option_name, engine) + yield + set_option(option_name, prev_engine) + + @pytest.mark.parametrize("merge_cells", [True, False]) @pytest.mark.parametrize("engine,ext", [ pytest.param('openpyxl', '.xlsx', marks=pytest.mark.skipif( @@ -1038,9 +1050,8 @@ def test_read_excel_squeeze(self, ext): pytest.param('xlsxwriter', '.xlsx', marks=pytest.mark.skipif( not td.safe_import('xlsxwriter'), reason='No xlsxwriter')) ]) -class TestExcelWriter(SharedItems): +class TestExcelWriter(_WriterBase): # Base class for test cases to run with different Excel writers. - merge_cells = True def test_excel_sheet_by_name_raise(self, merge_cells, engine, ext): import xlrd @@ -1935,10 +1946,11 @@ def test_path_localpath(self, merge_cells, engine, ext): @td.skip_if_no('openpyxl') -@pytest.mark.parametrize("ext", ['.xlsx']) -class TestOpenpyxlTests(SharedItems): +@pytest.mark.parametrize("merge_cells,ext,engine", [ + (None, '.xlsx', 'openpyxl')]) +class TestOpenpyxlTests(_WriterBase): - def test_to_excel_styleconverter(self, ext): + def test_to_excel_styleconverter(self, merge_cells, ext, engine): from openpyxl import styles hstyle = { @@ -1992,7 +2004,7 @@ def test_to_excel_styleconverter(self, ext): assert kw['number_format'] == number_format assert kw['protection'] == protection - def test_write_cells_merge_styled(self, ext): + def test_write_cells_merge_styled(self, merge_cells, ext, engine): from pandas.io.formats.excel import ExcelCell sheet_name = 'merge_styled' @@ -2026,10 +2038,12 @@ def test_write_cells_merge_styled(self, ext): @td.skip_if_no('xlwt') -@pytest.mark.parametrize("ext", ['.xls']) -class TestXlwtTests(SharedItems): +@pytest.mark.parametrize("merge_cells,ext,engine", [ + (None, '.xls', 'xlwt')]) +class TestXlwtTests(_WriterBase): - def test_excel_raise_error_on_multiindex_columns_and_no_index(self, ext): + def test_excel_raise_error_on_multiindex_columns_and_no_index( + self, merge_cells, ext, engine): # MultiIndex as columns is not yet implemented 9794 cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), @@ -2039,7 +2053,8 @@ def test_excel_raise_error_on_multiindex_columns_and_no_index(self, ext): with ensure_clean(ext) as path: df.to_excel(path, index=False) - def test_excel_multiindex_columns_and_index_true(self, ext): + def test_excel_multiindex_columns_and_index_true(self, merge_cells, ext, + engine): cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), ('2014', 'weight')]) @@ -2047,7 +2062,7 @@ def test_excel_multiindex_columns_and_index_true(self, ext): with ensure_clean(ext) as path: df.to_excel(path, index=True) - def test_excel_multiindex_index(self, ext): + def test_excel_multiindex_index(self, merge_cells, ext, engine): # MultiIndex as index works so assert no error #9794 cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), @@ -2056,7 +2071,7 @@ def test_excel_multiindex_index(self, ext): with ensure_clean(ext) as path: df.to_excel(path, index=False) - def test_to_excel_styleconverter(self, ext): + def test_to_excel_styleconverter(self, merge_cells, ext, engine): import xlwt hstyle = {"font": {"bold": True}, @@ -2077,10 +2092,11 @@ def test_to_excel_styleconverter(self, ext): @td.skip_if_no('xlsxwriter') -@pytest.mark.parametrize("ext", ['.xlsx']) -class TestXlsxWriterTests(SharedItems): +@pytest.mark.parametrize("merge_cells,ext,engine", [ + (None, '.xlsx', 'xlsxwriter')]) +class TestXlsxWriterTests(_WriterBase): - def test_column_format(self, ext): + def test_column_format(self, merge_cells, ext, engine): # Test that column formats are applied to cells. Test for issue #9167. # Applicable to xlsxwriter only. with warnings.catch_warnings(): From d6bb4aad43ac81183f3484b7a2e5517e155c8d27 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 13:17:59 -0800 Subject: [PATCH 09/19] Final refactor to remove _skip_if_no module funcs --- pandas/tests/io/test_excel.py | 61 ++++++++++++----------------------- 1 file changed, 20 insertions(+), 41 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 7cf846a15cb95..a678780ec04bd 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -28,27 +28,6 @@ from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf -def _skip_if_no_xlwt(): - try: - import xlwt # NOQA - except ImportError: - pytest.skip('xlwt not installed, skipping') - - -def _skip_if_no_openpyxl(): - try: - import openpyxl # NOQA - except ImportError: - pytest.skip('openpyxl not installed, skipping') - - -def _skip_if_no_xlsxwriter(): - try: - import xlsxwriter # NOQA - except ImportError: - pytest.skip('xlsxwriter not installed, skipping') - - _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = DataFrame(_seriesd)[:10] @@ -2096,13 +2075,13 @@ def test_to_excel_styleconverter(self, merge_cells, ext, engine): (None, '.xlsx', 'xlsxwriter')]) class TestXlsxWriterTests(_WriterBase): + @td.skip_if_no('openpyxl') def test_column_format(self, merge_cells, ext, engine): # Test that column formats are applied to cells. Test for issue #9167. # Applicable to xlsxwriter only. with warnings.catch_warnings(): # Ignore the openpyxl lxml warning. warnings.simplefilter("ignore") - _skip_if_no_openpyxl() import openpyxl with ensure_clean(ext) as path: @@ -2144,25 +2123,26 @@ def test_column_format(self, merge_cells, ext, engine): class TestExcelWriterEngineTests(object): - def test_ExcelWriter_dispatch(self): - with tm.assert_raises_regex(ValueError, 'No engine'): - ExcelWriter('nothing') - - try: - import xlsxwriter # noqa - writer_klass = _XlsxWriter - except ImportError: - _skip_if_no_openpyxl() - writer_klass = _OpenpyxlWriter - - with ensure_clean('.xlsx') as path: + @pytest.mark.parametrize('klass,ext', [ + pytest.param(_XlsxWriter, '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('xlsxwriter'), reason='No xlsxwriter')), + pytest.param(_OpenpyxlWriter, '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('openpyxl'), reason='No openpyxl')), + pytest.param(_XlwtWriter, '.xls', marks=pytest.mark.skipif( + not td.safe_import('xlwt'), reason='No xlwt')) + ]) + def test_ExcelWriter_dispatch(self, klass, ext): + with ensure_clean(ext) as path: writer = ExcelWriter(path) - assert isinstance(writer, writer_klass) + if ext == '.xlsx' and td.safe_import('xlsxwriter'): + # xlsxwriter has preference over openpyxl if both installed + assert isinstance(writer, _XlsxWriter) + else: + assert isinstance(writer, klass) - _skip_if_no_xlwt() - with ensure_clean('.xls') as path: - writer = ExcelWriter(path) - assert isinstance(writer, _XlwtWriter) + def test_ExcelWriter_dispatch_raises(self): + with tm.assert_raises_regex(ValueError, 'No engine'): + ExcelWriter('nothing') def test_register_writer(self): # some awkward mocking to test out dispatch and such actually works @@ -2353,11 +2333,11 @@ def custom_converter(css): assert n_cells == (10 + 1) * (3 + 1) +@td.skip_if_no('openpyxl') class TestFSPath(object): @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') def test_excelfile_fspath(self): - _skip_if_no_openpyxl() with tm.ensure_clean('foo.xlsx') as path: df = DataFrame({"A": [1, 2]}) df.to_excel(path) @@ -2368,7 +2348,6 @@ def test_excelfile_fspath(self): @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') # @pytest.mark.xfail def test_excelwriter_fspath(self): - _skip_if_no_openpyxl() with tm.ensure_clean('foo.xlsx') as path: writer = ExcelWriter(path) assert os.fspath(writer) == str(path) From d885b93e092c377b64a7ced8a121de66fe2cacf8 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 21 Feb 2018 13:28:30 -0800 Subject: [PATCH 10/19] LINTing --- pandas/tests/io/test_excel.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index a678780ec04bd..f2741fc183617 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -165,8 +165,8 @@ def test_usecols_str(self, ext): df1 = dfref.reindex(columns=['B', 'C']) df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols='A,C,D') - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, - usecols='A,C,D') + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols='A,C,D') # TODO add index to xls file tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) @@ -174,8 +174,8 @@ def test_usecols_str(self, ext): df1 = dfref.reindex(columns=['B', 'C']) df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols='A,C:D') - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, - usecols='A,C:D') + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols='A,C:D') tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) @@ -1473,7 +1473,8 @@ def test_to_excel_multiindex_dates(self, merge_cells, engine, ext): tm.assert_frame_equal(tsframe, recons) assert recons.index.names == ('time', 'foo') - def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, ext): + def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, + ext): # Test writing and re-reading a MI witout the index. GH 5616. # Initial non-MI frame. @@ -1697,7 +1698,7 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): assert res.iloc[0, 0] is not np.nan def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells, - engine,ext): + engine, ext): # This test was failing only for j>1 and header=False, # So I reproduced a simple test. if merge_cells: From a681e5ec780709845c10c29bce034a1381d6bd22 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 22 Feb 2018 08:37:11 -0800 Subject: [PATCH 11/19] Added wraps to compat for Py27 testing --- pandas/compat/__init__.py | 12 ++++++++++++ pandas/util/testing.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 80a2c05d86971..8ff6559c36e20 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -365,6 +365,18 @@ def callable(obj): return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) +if sys.version_info[0:2] < (3, 4): + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + def wrapper(f): + f = functools.wraps(wrapped, assigned, updated)(f) + f.__wrapped__ = wrapped + return f + return wrapper +else: + wraps = functools.wraps + + def add_metaclass(metaclass): """Class decorator for creating a class with a metaclass.""" def wrapper(cls): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 0009e26f8b100..942416408e4f0 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2165,7 +2165,7 @@ def network(t, url="http://www.google.com", from pytest import skip t.network = True - @wraps(t) + @compat.wraps(t) def wrapper(*args, **kwargs): if check_before_test and not raise_on_error: if not can_connect(url, error_classes): From 8035620eb5a1fad28850271facb9801feed59664 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 22 Feb 2018 18:37:36 -0800 Subject: [PATCH 12/19] Comments for compat.wraps requirement --- pandas/compat/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 8ff6559c36e20..78aaf4596c8b7 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -365,7 +365,9 @@ def callable(obj): return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) -if sys.version_info[0:2] < (3, 4): +if sys.version_info[0] < 3: + # In PY2 functools.wraps doesn't provide metadata pytest needs to generate + # decorated tests using parametrization. See pytest GH issue #2782 def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, updated=functools.WRAPPER_UPDATES): def wrapper(f): From 0cfdaf74c1bfe5f6708c7ce2de3754ed04d0ccb7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 24 Feb 2018 16:20:14 -0800 Subject: [PATCH 13/19] Removed hard-coded ensure_clean --- pandas/tests/io/test_excel.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index f2741fc183617..68f24928a7c67 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1374,7 +1374,9 @@ def test_to_excel_interval_labels(self, merge_cells, engine, ext): def test_to_excel_timedelta(self, merge_cells, engine, ext): # GH 19242, GH9155 - test writing timedelta to xls - with ensure_clean('.xls') as path: + if engine == 'openpyxl': + pytest.skip('Timedelta roundtrip broken with openpyxl') + with ensure_clean(ext) as path: frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), columns=['A'], dtype=np.int64 From 96708becd087171a469f7258af7fe4465339e266 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 24 Feb 2018 16:27:14 -0800 Subject: [PATCH 14/19] Futher parametrization --- pandas/tests/io/test_excel.py | 83 ++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 68f24928a7c67..15d94d83f145d 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1129,48 +1129,49 @@ def test_basics_with_nan(self, merge_cells, engine, ext): self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) - def test_int_types(self, merge_cells, engine, ext): - for np_type in (np.int8, np.int16, np.int32, np.int64): + @pytest.mark.parametrize("np_type", [ + np.int8, np.int16, np.int32, np.int64]) + def test_int_types(self, merge_cells, engine, ext, np_type): + with ensure_clean(ext) as path: + # Test np.int values read come back as int (rather than float + # which is Excel's format). + frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), + dtype=np_type) + frame.to_excel(path, 'test1') + reader = ExcelFile(path) + recons = read_excel(reader, 'test1') + int_frame = frame.astype(np.int64) + tm.assert_frame_equal(int_frame, recons) + recons2 = read_excel(path, 'test1') + tm.assert_frame_equal(int_frame, recons2) + + # test with convert_float=False comes back as float + float_frame = frame.astype(float) + recons = read_excel(path, 'test1', convert_float=False) + tm.assert_frame_equal(recons, float_frame, + check_index_type=False, + check_column_type=False) + + @pytest.mark.parametrize("np_type", [ + np.float16, np.float32, np.float64]) + def test_float_types(self, merge_cells, engine, ext, np_type): + with ensure_clean(ext) as path: + # Test np.float values read come back as float. + frame = DataFrame(np.random.random_sample(10), dtype=np_type) + frame.to_excel(path, 'test1') + reader = ExcelFile(path) + recons = read_excel(reader, 'test1').astype(np_type) + tm.assert_frame_equal(frame, recons, check_dtype=False) - with ensure_clean(ext) as path: - # Test np.int values read come back as int (rather than float - # which is Excel's format). - frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), - dtype=np_type) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - int_frame = frame.astype(np.int64) - tm.assert_frame_equal(int_frame, recons) - recons2 = read_excel(path, 'test1') - tm.assert_frame_equal(int_frame, recons2) - - # test with convert_float=False comes back as float - float_frame = frame.astype(float) - recons = read_excel(path, 'test1', convert_float=False) - tm.assert_frame_equal(recons, float_frame, - check_index_type=False, - check_column_type=False) - - def test_float_types(self, merge_cells, engine, ext): - for np_type in (np.float16, np.float32, np.float64): - with ensure_clean(ext) as path: - # Test np.float values read come back as float. - frame = DataFrame(np.random.random_sample(10), dtype=np_type) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1').astype(np_type) - tm.assert_frame_equal(frame, recons, check_dtype=False) - - def test_bool_types(self, merge_cells, engine, ext): - for np_type in (np.bool8, np.bool_): - with ensure_clean(ext) as path: - # Test np.bool values read come back as float. - frame = (DataFrame([1, 0, True, False], dtype=np_type)) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1').astype(np_type) - tm.assert_frame_equal(frame, recons) + @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) + def test_bool_types(self, merge_cells, engine, ext, np_type): + with ensure_clean(ext) as path: + # Test np.bool values read come back as float. + frame = (DataFrame([1, 0, True, False], dtype=np_type)) + frame.to_excel(path, 'test1') + reader = ExcelFile(path) + recons = read_excel(reader, 'test1').astype(np_type) + tm.assert_frame_equal(frame, recons) def test_inf_roundtrip(self, merge_cells, engine, ext): frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) From 9b16035b1ddd902e9ff90637ca00c2c7c7db0792 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 24 Feb 2018 16:42:55 -0800 Subject: [PATCH 15/19] Moved ensure_clean to class fixture --- pandas/tests/io/test_excel.py | 841 ++++++++++++++++------------------ 1 file changed, 401 insertions(+), 440 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 15d94d83f145d..f7a921ed62f56 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1016,7 +1016,9 @@ def set_options(self, request, merge_cells, engine, ext): option_name = 'io.excel.{ext}.writer'.format(ext=ext.strip('.')) prev_engine = get_option(option_name) set_option(option_name, engine) - yield + with ensure_clean(ext) as path: + self.path = path + yield set_option(option_name, prev_engine) @@ -1035,277 +1037,259 @@ class TestExcelWriter(_WriterBase): def test_excel_sheet_by_name_raise(self, merge_cells, engine, ext): import xlrd - with ensure_clean(ext) as pth: - gt = DataFrame(np.random.randn(10, 2)) - gt.to_excel(pth) - xl = ExcelFile(pth) - df = read_excel(xl, 0) - tm.assert_frame_equal(gt, df) + gt = DataFrame(np.random.randn(10, 2)) + gt.to_excel(self.path) + xl = ExcelFile(self.path) + df = read_excel(xl, 0) + tm.assert_frame_equal(gt, df) - with pytest.raises(xlrd.XLRDError): - read_excel(xl, '0') + with pytest.raises(xlrd.XLRDError): + read_excel(xl, '0') def test_excelwriter_contextmanager(self, merge_cells, engine, ext): - with ensure_clean(ext) as pth: - with ExcelWriter(pth) as writer: - self.frame.to_excel(writer, 'Data1') - self.frame2.to_excel(writer, 'Data2') + with ExcelWriter(self.path) as writer: + self.frame.to_excel(writer, 'Data1') + self.frame2.to_excel(writer, 'Data2') - with ExcelFile(pth) as reader: - found_df = read_excel(reader, 'Data1') - found_df2 = read_excel(reader, 'Data2') - tm.assert_frame_equal(found_df, self.frame) - tm.assert_frame_equal(found_df2, self.frame2) + with ExcelFile(self.path) as reader: + found_df = read_excel(reader, 'Data1') + found_df2 = read_excel(reader, 'Data2') + tm.assert_frame_equal(found_df, self.frame) + tm.assert_frame_equal(found_df2, self.frame2) def test_roundtrip(self, merge_cells, engine, ext): - with ensure_clean(ext) as path: - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # test roundtrip - self.frame.to_excel(path, 'test1') - recons = read_excel(path, 'test1', index_col=0) - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, 'test1', index=False) - recons = read_excel(path, 'test1', index_col=None) - recons.index = self.frame.index - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, 'test1', na_rep='NA') - recons = read_excel(path, 'test1', index_col=0, na_values=['NA']) - tm.assert_frame_equal(self.frame, recons) - - # GH 3611 - self.frame.to_excel(path, 'test1', na_rep='88') - recons = read_excel(path, 'test1', index_col=0, na_values=['88']) - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, 'test1', na_rep='88') - recons = read_excel(path, 'test1', index_col=0, - na_values=[88, 88.0]) - tm.assert_frame_equal(self.frame, recons) - - # GH 6573 - self.frame.to_excel(path, 'Sheet1') - recons = read_excel(path, index_col=0) - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, '0') - recons = read_excel(path, index_col=0) - tm.assert_frame_equal(self.frame, recons) - - # GH 8825 Pandas Series should provide to_excel method - s = self.frame["A"] - s.to_excel(path) - recons = read_excel(path, index_col=0) - tm.assert_frame_equal(s.to_frame(), recons) + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # test roundtrip + self.frame.to_excel(self.path, 'test1') + recons = read_excel(self.path, 'test1', index_col=0) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, 'test1', index=False) + recons = read_excel(self.path, 'test1', index_col=None) + recons.index = self.frame.index + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, 'test1', na_rep='NA') + recons = read_excel(self.path, 'test1', index_col=0, na_values=['NA']) + tm.assert_frame_equal(self.frame, recons) + + # GH 3611 + self.frame.to_excel(self.path, 'test1', na_rep='88') + recons = read_excel(self.path, 'test1', index_col=0, na_values=['88']) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, 'test1', na_rep='88') + recons = read_excel(self.path, 'test1', index_col=0, + na_values=[88, 88.0]) + tm.assert_frame_equal(self.frame, recons) + + # GH 6573 + self.frame.to_excel(self.path, 'Sheet1') + recons = read_excel(self.path, index_col=0) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, '0') + recons = read_excel(self.path, index_col=0) + tm.assert_frame_equal(self.frame, recons) + + # GH 8825 Pandas Series should provide to_excel method + s = self.frame["A"] + s.to_excel(self.path) + recons = read_excel(self.path, index_col=0) + tm.assert_frame_equal(s.to_frame(), recons) def test_mixed(self, merge_cells, engine, ext): - with ensure_clean(ext) as path: - self.mixed_frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', index_col=0) - tm.assert_frame_equal(self.mixed_frame, recons) + self.mixed_frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', index_col=0) + tm.assert_frame_equal(self.mixed_frame, recons) def test_tsframe(self, merge_cells, engine, ext): df = tm.makeTimeDataFrame()[:5] - with ensure_clean(ext) as path: - df.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(df, recons) + df.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(df, recons) def test_basics_with_nan(self, merge_cells, engine, ext): - with ensure_clean(ext) as path: - self.frame['A'][:5] = nan - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) + self.frame['A'][:5] = nan + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) @pytest.mark.parametrize("np_type", [ np.int8, np.int16, np.int32, np.int64]) def test_int_types(self, merge_cells, engine, ext, np_type): - with ensure_clean(ext) as path: - # Test np.int values read come back as int (rather than float - # which is Excel's format). - frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), - dtype=np_type) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - int_frame = frame.astype(np.int64) - tm.assert_frame_equal(int_frame, recons) - recons2 = read_excel(path, 'test1') - tm.assert_frame_equal(int_frame, recons2) - - # test with convert_float=False comes back as float - float_frame = frame.astype(float) - recons = read_excel(path, 'test1', convert_float=False) - tm.assert_frame_equal(recons, float_frame, - check_index_type=False, - check_column_type=False) + # Test np.int values read come back as int (rather than float + # which is Excel's format). + frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), + dtype=np_type) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + int_frame = frame.astype(np.int64) + tm.assert_frame_equal(int_frame, recons) + recons2 = read_excel(self.path, 'test1') + tm.assert_frame_equal(int_frame, recons2) + + # test with convert_float=False comes back as float + float_frame = frame.astype(float) + recons = read_excel(self.path, 'test1', convert_float=False) + tm.assert_frame_equal(recons, float_frame, + check_index_type=False, + check_column_type=False) @pytest.mark.parametrize("np_type", [ np.float16, np.float32, np.float64]) def test_float_types(self, merge_cells, engine, ext, np_type): - with ensure_clean(ext) as path: - # Test np.float values read come back as float. - frame = DataFrame(np.random.random_sample(10), dtype=np_type) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1').astype(np_type) - tm.assert_frame_equal(frame, recons, check_dtype=False) + # Test np.float values read come back as float. + frame = DataFrame(np.random.random_sample(10), dtype=np_type) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1').astype(np_type) + tm.assert_frame_equal(frame, recons, check_dtype=False) @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) def test_bool_types(self, merge_cells, engine, ext, np_type): - with ensure_clean(ext) as path: - # Test np.bool values read come back as float. - frame = (DataFrame([1, 0, True, False], dtype=np_type)) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1').astype(np_type) - tm.assert_frame_equal(frame, recons) + # Test np.bool values read come back as float. + frame = (DataFrame([1, 0, True, False], dtype=np_type)) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1').astype(np_type) + tm.assert_frame_equal(frame, recons) def test_inf_roundtrip(self, merge_cells, engine, ext): frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) - with ensure_clean(ext) as path: - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(frame, recons) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(frame, recons) def test_sheets(self, merge_cells, engine, ext): - with ensure_clean(ext) as path: - self.frame['A'][:5] = nan + self.frame['A'][:5] = nan - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) - # Test writing to separate sheets - writer = ExcelWriter(path) - self.frame.to_excel(writer, 'test1') - self.tsframe.to_excel(writer, 'test2') - writer.save() - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', index_col=0) - tm.assert_frame_equal(self.frame, recons) - recons = read_excel(reader, 'test2', index_col=0) - tm.assert_frame_equal(self.tsframe, recons) - assert 2 == len(reader.sheet_names) - assert 'test1' == reader.sheet_names[0] - assert 'test2' == reader.sheet_names[1] + # Test writing to separate sheets + writer = ExcelWriter(self.path) + self.frame.to_excel(writer, 'test1') + self.tsframe.to_excel(writer, 'test2') + writer.save() + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', index_col=0) + tm.assert_frame_equal(self.frame, recons) + recons = read_excel(reader, 'test2', index_col=0) + tm.assert_frame_equal(self.tsframe, recons) + assert 2 == len(reader.sheet_names) + assert 'test1' == reader.sheet_names[0] + assert 'test2' == reader.sheet_names[1] def test_colaliases(self, merge_cells, engine, ext): - with ensure_clean(ext) as path: - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # column aliases - col_aliases = Index(['AA', 'X', 'Y', 'Z']) - self.frame2.to_excel(path, 'test1', header=col_aliases) - reader = ExcelFile(path) - rs = read_excel(reader, 'test1', index_col=0) - xp = self.frame2.copy() - xp.columns = col_aliases - tm.assert_frame_equal(xp, rs) + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # column aliases + col_aliases = Index(['AA', 'X', 'Y', 'Z']) + self.frame2.to_excel(self.path, 'test1', header=col_aliases) + reader = ExcelFile(self.path) + rs = read_excel(reader, 'test1', index_col=0) + xp = self.frame2.copy() + xp.columns = col_aliases + tm.assert_frame_equal(xp, rs) def test_roundtrip_indexlabels(self, merge_cells, engine, ext): - with ensure_clean(ext) as path: - - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # test index_label - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, 'test1', - index_label=['test'], - merge_cells=merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) - frame.index.names = ['test'] - assert frame.index.names == recons.index.names - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, - 'test1', - index_label=['test', 'dummy', 'dummy2'], - merge_cells=merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) - frame.index.names = ['test'] - assert frame.index.names == recons.index.names - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, - 'test1', - index_label='test', - merge_cells=merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) - frame.index.names = ['test'] - tm.assert_frame_equal(frame, recons.astype(bool)) - - with ensure_clean(ext) as path: - - self.frame.to_excel(path, - 'test1', - columns=['A', 'B', 'C', 'D'], - index=False, merge_cells=merge_cells) - # take 'A' and 'B' as indexes (same row as cols 'C', 'D') - df = self.frame.copy() - df = df.set_index(['A', 'B']) - - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', index_col=[0, 1]) - tm.assert_frame_equal(df, recons, check_less_precise=True) + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # test index_label + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(self.path, 'test1', + index_label=['test'], + merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=0, + ).astype(np.int64) + frame.index.names = ['test'] + assert frame.index.names == recons.index.names + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(self.path, + 'test1', + index_label=['test', 'dummy', 'dummy2'], + merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=0, + ).astype(np.int64) + frame.index.names = ['test'] + assert frame.index.names == recons.index.names + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(self.path, + 'test1', + index_label='test', + merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=0, + ).astype(np.int64) + frame.index.names = ['test'] + tm.assert_frame_equal(frame, recons.astype(bool)) + + self.frame.to_excel(self.path, + 'test1', + columns=['A', 'B', 'C', 'D'], + index=False, merge_cells=merge_cells) + # take 'A' and 'B' as indexes (same row as cols 'C', 'D') + df = self.frame.copy() + df = df.set_index(['A', 'B']) + + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', index_col=[0, 1]) + tm.assert_frame_equal(df, recons, check_less_precise=True) def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' - with ensure_clean(ext) as path: - df.to_excel(path, merge_cells=merge_cells) + df.to_excel(self.path, merge_cells=merge_cells) - xf = ExcelFile(path) - result = read_excel(xf, xf.sheet_names[0], - index_col=0) + xf = ExcelFile(self.path) + result = read_excel(xf, xf.sheet_names[0], + index_col=0) - tm.assert_frame_equal(result, df) - assert result.index.name == 'foo' + tm.assert_frame_equal(result, df) + assert result.index.name == 'foo' def test_excel_roundtrip_datetime(self, merge_cells, engine, ext): # datetime.date, not sure what to test here exactly tsf = self.tsframe.copy() - with ensure_clean(ext) as path: - tsf.index = [x.date() for x in self.tsframe.index] - tsf.to_excel(path, 'test1', merge_cells=merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(self.tsframe, recons) + tsf.index = [x.date() for x in self.tsframe.index] + tsf.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(self.tsframe, recons) # GH4133 - excel output format strings def test_excel_date_datetime_format(self, merge_cells, engine, ext): @@ -1320,87 +1304,82 @@ def test_excel_date_datetime_format(self, merge_cells, engine, ext): datetime(2014, 2, 28, 13, 5, 13)]], index=['DATE', 'DATETIME'], columns=['X', 'Y']) - with ensure_clean(ext) as filename1: - with ensure_clean(ext) as filename2: - writer1 = ExcelWriter(filename1) - writer2 = ExcelWriter(filename2, - date_format='DD.MM.YYYY', - datetime_format='DD.MM.YYYY HH-MM-SS') + with ensure_clean(ext) as filename2: + writer1 = ExcelWriter(self.path) + writer2 = ExcelWriter(filename2, + date_format='DD.MM.YYYY', + datetime_format='DD.MM.YYYY HH-MM-SS') - df.to_excel(writer1, 'test1') - df.to_excel(writer2, 'test1') + df.to_excel(writer1, 'test1') + df.to_excel(writer2, 'test1') - writer1.close() - writer2.close() + writer1.close() + writer2.close() - reader1 = ExcelFile(filename1) - reader2 = ExcelFile(filename2) + reader1 = ExcelFile(self.path) + reader2 = ExcelFile(filename2) - rs1 = read_excel(reader1, 'test1', index_col=None) - rs2 = read_excel(reader2, 'test1', index_col=None) + rs1 = read_excel(reader1, 'test1', index_col=None) + rs2 = read_excel(reader2, 'test1', index_col=None) - tm.assert_frame_equal(rs1, rs2) + tm.assert_frame_equal(rs1, rs2) - # since the reader returns a datetime object for dates, we need - # to use df_expected to check the result - tm.assert_frame_equal(rs2, df_expected) + # since the reader returns a datetime object for dates, we need + # to use df_expected to check the result + tm.assert_frame_equal(rs2, df_expected) def test_to_excel_interval_no_labels(self, merge_cells, engine, ext): # GH19242 - test writing Interval without labels - with ensure_clean(ext) as path: - frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - dtype=np.int64) - expected = frame.copy() - frame['new'] = pd.cut(frame[0], 10) - expected['new'] = pd.cut(expected[0], 10).astype(str) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(expected, recons) + frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + dtype=np.int64) + expected = frame.copy() + frame['new'] = pd.cut(frame[0], 10) + expected['new'] = pd.cut(expected[0], 10).astype(str) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(expected, recons) def test_to_excel_interval_labels(self, merge_cells, engine, ext): # GH19242 - test writing Interval with labels - with ensure_clean(ext) as path: - frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - dtype=np.int64) - expected = frame.copy() - intervals = pd.cut(frame[0], 10, labels=['A', 'B', 'C', 'D', 'E', - 'F', 'G', 'H', 'I', 'J']) - frame['new'] = intervals - expected['new'] = pd.Series(list(intervals)) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(expected, recons) + frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + dtype=np.int64) + expected = frame.copy() + intervals = pd.cut(frame[0], 10, labels=['A', 'B', 'C', 'D', 'E', + 'F', 'G', 'H', 'I', 'J']) + frame['new'] = intervals + expected['new'] = pd.Series(list(intervals)) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(expected, recons) def test_to_excel_timedelta(self, merge_cells, engine, ext): # GH 19242, GH9155 - test writing timedelta to xls if engine == 'openpyxl': pytest.skip('Timedelta roundtrip broken with openpyxl') - with ensure_clean(ext) as path: - frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - columns=['A'], - dtype=np.int64 - ) - expected = frame.copy() - frame['new'] = frame['A'].apply(lambda x: timedelta(seconds=x)) - expected['new'] = expected['A'].apply( - lambda x: timedelta(seconds=x).total_seconds() / float(86400)) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(expected, recons) + frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + columns=['A'], + dtype=np.int64 + ) + expected = frame.copy() + frame['new'] = frame['A'].apply(lambda x: timedelta(seconds=x)) + expected['new'] = expected['A'].apply( + lambda x: timedelta(seconds=x).total_seconds() / float(86400)) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(expected, recons) def test_to_excel_periodindex(self, merge_cells, engine, ext): frame = self.tsframe xp = frame.resample('M', kind='period').mean() - with ensure_clean(ext) as path: - xp.to_excel(path, 'sht1') + xp.to_excel(self.path, 'sht1') - reader = ExcelFile(path) - rs = read_excel(reader, 'sht1', index_col=0) - tm.assert_frame_equal(xp, rs.to_period('M')) + reader = ExcelFile(self.path) + rs = read_excel(reader, 'sht1', index_col=0) + tm.assert_frame_equal(xp, rs.to_period('M')) def test_to_excel_multiindex(self, merge_cells, engine, ext): frame = self.frame @@ -1409,15 +1388,14 @@ def test_to_excel_multiindex(self, merge_cells, engine, ext): names=['first', 'second']) frame.index = new_index - with ensure_clean(ext) as path: - frame.to_excel(path, 'test1', header=False) - frame.to_excel(path, 'test1', columns=['A', 'B']) + frame.to_excel(self.path, 'test1', header=False) + frame.to_excel(self.path, 'test1', columns=['A', 'B']) - # round trip - frame.to_excel(path, 'test1', merge_cells=merge_cells) - reader = ExcelFile(path) - df = read_excel(reader, 'test1', index_col=[0, 1]) - tm.assert_frame_equal(frame, df) + # round trip + frame.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + df = read_excel(reader, 'test1', index_col=[0, 1]) + tm.assert_frame_equal(frame, df) # GH13511 def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext): @@ -1426,10 +1404,9 @@ def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext): 'C': np.random.sample(3)}) frame = frame.set_index(['A', 'B']) - with ensure_clean(ext) as path: - frame.to_excel(path, merge_cells=merge_cells) - df = read_excel(path, index_col=[0, 1]) - tm.assert_frame_equal(frame, df) + frame.to_excel(self.path, merge_cells=merge_cells) + df = read_excel(self.path, index_col=[0, 1]) + tm.assert_frame_equal(frame, df) # Test for Issue 11328. If column indices are integers, make # sure they are handled correctly for either setting of @@ -1448,17 +1425,16 @@ def test_to_excel_multiindex_cols(self, merge_cells, engine, ext): if not merge_cells: header = 0 - with ensure_clean(ext) as path: - # round trip - frame.to_excel(path, 'test1', merge_cells=merge_cells) - reader = ExcelFile(path) - df = read_excel(reader, 'test1', header=header, - index_col=[0, 1]) - if not merge_cells: - fm = frame.columns.format(sparsify=False, - adjoin=False, names=False) - frame.columns = [".".join(map(str, q)) for q in zip(*fm)] - tm.assert_frame_equal(frame, df) + # round trip + frame.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + df = read_excel(reader, 'test1', header=header, + index_col=[0, 1]) + if not merge_cells: + fm = frame.columns.format(sparsify=False, + adjoin=False, names=False) + frame.columns = [".".join(map(str, q)) for q in zip(*fm)] + tm.assert_frame_equal(frame, df) def test_to_excel_multiindex_dates(self, merge_cells, engine, ext): # try multiindex with dates @@ -1466,15 +1442,14 @@ def test_to_excel_multiindex_dates(self, merge_cells, engine, ext): new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index) - with ensure_clean(ext) as path: - tsframe.index.names = ['time', 'foo'] - tsframe.to_excel(path, 'test1', merge_cells=merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=[0, 1]) + tsframe.index.names = ['time', 'foo'] + tsframe.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=[0, 1]) - tm.assert_frame_equal(tsframe, recons) - assert recons.index.names == ('time', 'foo') + tm.assert_frame_equal(tsframe, recons) + assert recons.index.names == ('time', 'foo') def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, ext): @@ -1488,32 +1463,29 @@ def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)]) frame2.index = multi_index - with ensure_clean(ext) as path: + # Write out to Excel without the index. + frame2.to_excel(self.path, 'test1', index=False) - # Write out to Excel without the index. - frame2.to_excel(path, 'test1', index=False) + # Read it back in. + reader = ExcelFile(self.path) + frame3 = read_excel(reader, 'test1') - # Read it back in. - reader = ExcelFile(path) - frame3 = read_excel(reader, 'test1') - - # Test that it is the same as the initial frame. - tm.assert_frame_equal(frame1, frame3) + # Test that it is the same as the initial frame. + tm.assert_frame_equal(frame1, frame3) def test_to_excel_float_format(self, merge_cells, engine, ext): df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) - with ensure_clean(ext) as filename: - df.to_excel(filename, 'test1', float_format='%.2f') + df.to_excel(self.path, 'test1', float_format='%.2f') - reader = ExcelFile(filename) - rs = read_excel(reader, 'test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) + reader = ExcelFile(self.path) + rs = read_excel(reader, 'test1', index_col=None) + xp = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + tm.assert_frame_equal(rs, xp) def test_to_excel_output_encoding(self, merge_cells, engine, ext): # avoid mixed inferred_type @@ -1659,12 +1631,11 @@ def test_excel_010_hemstring(self, merge_cells, engine, ext): def roundtrip(df, header=True, parser_hdr=0, index=True): - with ensure_clean(ext) as path: - df.to_excel(path, header=header, - merge_cells=merge_cells, index=index) - xf = ExcelFile(path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res + df.to_excel(self.path, header=header, + merge_cells=merge_cells, index=index) + xf = ExcelFile(self.path) + res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) + return res nrows = 5 ncols = 3 @@ -1713,12 +1684,11 @@ def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells, def roundtrip2(df, header=True, parser_hdr=0, index=True): - with ensure_clean(ext) as path: - df.to_excel(path, header=header, - merge_cells=merge_cells, index=index) - xf = ExcelFile(path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res + df.to_excel(self.path, header=header, + merge_cells=merge_cells, index=index) + xf = ExcelFile(self.path) + res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) + return res nrows = 5 ncols = 3 @@ -1730,120 +1700,113 @@ def roundtrip2(df, header=True, parser_hdr=0, index=True): def test_duplicated_columns(self, merge_cells, engine, ext): # Test for issue #5235 - with ensure_clean(ext) as path: - write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) - colnames = ['A', 'B', 'B'] - - write_frame.columns = colnames - write_frame.to_excel(path, 'test1') - - read_frame = read_excel(path, 'test1') - read_frame.columns = colnames - tm.assert_frame_equal(write_frame, read_frame) - - # 11007 / #10970 - write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], - columns=['A', 'B', 'A', 'B']) - write_frame.to_excel(path, 'test1') - read_frame = read_excel(path, 'test1') - read_frame.columns = ['A', 'B', 'A', 'B'] - tm.assert_frame_equal(write_frame, read_frame) - - # 10982 - write_frame.to_excel(path, 'test1', index=False, header=False) - read_frame = read_excel(path, 'test1', header=None) - write_frame.columns = [0, 1, 2, 3] - tm.assert_frame_equal(write_frame, read_frame) + write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) + colnames = ['A', 'B', 'B'] + + write_frame.columns = colnames + write_frame.to_excel(self.path, 'test1') + + read_frame = read_excel(self.path, 'test1') + read_frame.columns = colnames + tm.assert_frame_equal(write_frame, read_frame) + + # 11007 / #10970 + write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=['A', 'B', 'A', 'B']) + write_frame.to_excel(self.path, 'test1') + read_frame = read_excel(self.path, 'test1') + read_frame.columns = ['A', 'B', 'A', 'B'] + tm.assert_frame_equal(write_frame, read_frame) + + # 10982 + write_frame.to_excel(self.path, 'test1', index=False, header=False) + read_frame = read_excel(self.path, 'test1', header=None) + write_frame.columns = [0, 1, 2, 3] + tm.assert_frame_equal(write_frame, read_frame) def test_swapped_columns(self, merge_cells, engine, ext): # Test for issue #5427. - with ensure_clean(ext) as path: - write_frame = DataFrame({'A': [1, 1, 1], - 'B': [2, 2, 2]}) - write_frame.to_excel(path, 'test1', columns=['B', 'A']) + write_frame = DataFrame({'A': [1, 1, 1], + 'B': [2, 2, 2]}) + write_frame.to_excel(self.path, 'test1', columns=['B', 'A']) - read_frame = read_excel(path, 'test1', header=0) + read_frame = read_excel(self.path, 'test1', header=0) - tm.assert_series_equal(write_frame['A'], read_frame['A']) - tm.assert_series_equal(write_frame['B'], read_frame['B']) + tm.assert_series_equal(write_frame['A'], read_frame['A']) + tm.assert_series_equal(write_frame['B'], read_frame['B']) def test_invalid_columns(self, merge_cells, engine, ext): # 10982 - with ensure_clean(ext) as path: - write_frame = DataFrame({'A': [1, 1, 1], - 'B': [2, 2, 2]}) + write_frame = DataFrame({'A': [1, 1, 1], + 'B': [2, 2, 2]}) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - write_frame.to_excel(path, 'test1', columns=['B', 'C']) - expected = write_frame.reindex(columns=['B', 'C']) - read_frame = read_excel(path, 'test1') - tm.assert_frame_equal(expected, read_frame) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + write_frame.to_excel(self.path, 'test1', columns=['B', 'C']) + expected = write_frame.reindex(columns=['B', 'C']) + read_frame = read_excel(self.path, 'test1') + tm.assert_frame_equal(expected, read_frame) - with pytest.raises(KeyError): - write_frame.to_excel(path, 'test1', columns=['C', 'D']) + with pytest.raises(KeyError): + write_frame.to_excel(self.path, 'test1', columns=['C', 'D']) def test_comment_arg(self, merge_cells, engine, ext): # Re issue #18735 # Test the comment argument functionality to read_excel - with ensure_clean(ext) as path: - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(path, 'test_c') + # Create file to read in + df = DataFrame({'A': ['one', '#one', 'one'], + 'B': ['two', 'two', '#two']}) + df.to_excel(self.path, 'test_c') - # Read file without comment arg - result1 = read_excel(path, 'test_c') - result1.iloc[1, 0] = None - result1.iloc[1, 1] = None - result1.iloc[2, 1] = None - result2 = read_excel(path, 'test_c', comment='#') - tm.assert_frame_equal(result1, result2) + # Read file without comment arg + result1 = read_excel(self.path, 'test_c') + result1.iloc[1, 0] = None + result1.iloc[1, 1] = None + result1.iloc[2, 1] = None + result2 = read_excel(self.path, 'test_c', comment='#') + tm.assert_frame_equal(result1, result2) def test_comment_default(self, merge_cells, engine, ext): # Re issue #18735 # Test the comment argument default to read_excel - with ensure_clean(ext) as path: - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(path, 'test_c') + # Create file to read in + df = DataFrame({'A': ['one', '#one', 'one'], + 'B': ['two', 'two', '#two']}) + df.to_excel(self.path, 'test_c') - # Read file with default and explicit comment=None - result1 = read_excel(path, 'test_c') - result2 = read_excel(path, 'test_c', comment=None) - tm.assert_frame_equal(result1, result2) + # Read file with default and explicit comment=None + result1 = read_excel(self.path, 'test_c') + result2 = read_excel(self.path, 'test_c', comment=None) + tm.assert_frame_equal(result1, result2) def test_comment_used(self, merge_cells, engine, ext): # Re issue #18735 # Test the comment argument is working as expected when used - with ensure_clean(ext) as path: - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(path, 'test_c') + # Create file to read in + df = DataFrame({'A': ['one', '#one', 'one'], + 'B': ['two', 'two', '#two']}) + df.to_excel(self.path, 'test_c') - # Test read_frame_comment against manually produced expected output - expected = DataFrame({'A': ['one', None, 'one'], - 'B': ['two', None, None]}) - result = read_excel(path, 'test_c', comment='#') - tm.assert_frame_equal(result, expected) + # Test read_frame_comment against manually produced expected output + expected = DataFrame({'A': ['one', None, 'one'], + 'B': ['two', None, None]}) + result = read_excel(self.path, 'test_c', comment='#') + tm.assert_frame_equal(result, expected) def test_comment_emptyline(self, merge_cells, engine, ext): # Re issue #18735 # Test that read_excel ignores commented lines at the end of file - with ensure_clean(ext) as path: - df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']}) - df.to_excel(path, index=False) + df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']}) + df.to_excel(self.path, index=False) - # Test that all-comment lines at EoF are ignored - expected = DataFrame({'a': [1], 'b': [2]}) - result = read_excel(path, comment='#') - tm.assert_frame_equal(result, expected) + # Test that all-comment lines at EoF are ignored + expected = DataFrame({'a': [1], 'b': [2]}) + result = read_excel(self.path, comment='#') + tm.assert_frame_equal(result, expected) def test_datetimes(self, merge_cells, engine, ext): @@ -1860,12 +1823,11 @@ def test_datetimes(self, merge_cells, engine, ext): datetime(2013, 1, 13, 16, 37, 0), datetime(2013, 1, 13, 18, 20, 52)] - with ensure_clean(ext) as path: - write_frame = DataFrame({'A': datetimes}) - write_frame.to_excel(path, 'Sheet1') - read_frame = read_excel(path, 'Sheet1', header=0) + write_frame = DataFrame({'A': datetimes}) + write_frame.to_excel(self.path, 'Sheet1') + read_frame = read_excel(self.path, 'Sheet1', header=0) - tm.assert_series_equal(write_frame['A'], read_frame['A']) + tm.assert_series_equal(write_frame['A'], read_frame['A']) # GH7074 def test_bytes_io(self, merge_cells, engine, ext): @@ -1887,29 +1849,28 @@ def test_write_lists_dict(self, merge_cells, engine, ext): expected = df.copy() expected.mixed = expected.mixed.apply(str) expected.numeric = expected.numeric.astype('int64') - with ensure_clean(ext) as path: - df.to_excel(path, 'Sheet1') - read = read_excel(path, 'Sheet1', header=0) - tm.assert_frame_equal(read, expected) + + df.to_excel(self.path, 'Sheet1') + read = read_excel(self.path, 'Sheet1', header=0) + tm.assert_frame_equal(read, expected) # GH13347 def test_true_and_false_value_options(self, merge_cells, engine, ext): df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2']) expected = df.replace({'foo': True, 'bar': False}) - with ensure_clean(ext) as path: - df.to_excel(path) - read_frame = read_excel(path, true_values=['foo'], - false_values=['bar']) - tm.assert_frame_equal(read_frame, expected) + + df.to_excel(self.path) + read_frame = read_excel(self.path, true_values=['foo'], + false_values=['bar']) + tm.assert_frame_equal(read_frame, expected) def test_freeze_panes(self, merge_cells, engine, ext): # GH15160 expected = DataFrame([[1, 2], [3, 4]], columns=['col1', 'col2']) - with ensure_clean(ext) as path: - expected.to_excel(path, "Sheet1", freeze_panes=(1, 1)) - result = read_excel(path) - tm.assert_frame_equal(expected, result) + expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1)) + result = read_excel(self.path) + tm.assert_frame_equal(expected, result) def test_path_pathlib(self, merge_cells, engine, ext): df = tm.makeDataFrame() From 164ee9c32a42c7910b92749af51d9b09d58f4f2e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 24 Feb 2018 16:50:15 -0800 Subject: [PATCH 16/19] Changed pytest.skip to pytest.xfail for visibility --- pandas/tests/io/test_excel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index f7a921ed62f56..cb6ef4a828406 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1357,7 +1357,7 @@ def test_to_excel_interval_labels(self, merge_cells, engine, ext): def test_to_excel_timedelta(self, merge_cells, engine, ext): # GH 19242, GH9155 - test writing timedelta to xls if engine == 'openpyxl': - pytest.skip('Timedelta roundtrip broken with openpyxl') + pytest.xfail('Timedelta roundtrip broken with openpyxl') frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), columns=['A'], dtype=np.int64 From 849933d2c2876d7e7bac3e44969cf27eb82c9e1f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 25 Feb 2018 15:40:47 -0800 Subject: [PATCH 17/19] Updated docstring and name of class test fixture --- pandas/tests/io/test_excel.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index cb6ef4a828406..adfbff1afbd5d 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1009,17 +1009,32 @@ def test_read_excel_squeeze(self, ext): class _WriterBase(SharedItems): - """Provides fixture to set / reset options for all writer tests""" @pytest.fixture(autouse=True) - def set_options(self, request, merge_cells, engine, ext): + def set_engine_and_path(self, request, merge_cells, engine, ext): + """Fixture to set engine and open file for use in each test case + + Rather than requiring `engine=...` to be provided explictly as an + argument in each test, this fixture sets a global option to dictate + which engine should be used to write Excel files. After executing + the test it rolls back said change to the global option. + + It also uses a context manager to open a temporary excel file for + the function to write to, accessible via `self.path` + + Notes + ----- + This fixture will run as part of each test method defined in the + class and any subclasses, on account of the `autouse=True` + argument + """ option_name = 'io.excel.{ext}.writer'.format(ext=ext.strip('.')) prev_engine = get_option(option_name) set_option(option_name, engine) with ensure_clean(ext) as path: self.path = path yield - set_option(option_name, prev_engine) + set_option(option_name, prev_engine) # Roll back option change @pytest.mark.parametrize("merge_cells", [True, False]) From 112dae46ece63a66ede24430da64980ba9d63749 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 25 Feb 2018 18:23:28 -0800 Subject: [PATCH 18/19] Parametrized .xlsm writing tests --- pandas/tests/io/test_excel.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index adfbff1afbd5d..e1e2622550504 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1041,6 +1041,8 @@ class and any subclasses, on account of the `autouse=True` @pytest.mark.parametrize("engine,ext", [ pytest.param('openpyxl', '.xlsx', marks=pytest.mark.skipif( not td.safe_import('openpyxl'), reason='No openpyxl')), + pytest.param('openpyxl', '.xlsm', marks=pytest.mark.skipif( + not td.safe_import('openpyxl'), reason='No openpyxl')), pytest.param('xlwt', '.xls', marks=pytest.mark.skipif( not td.safe_import('xlwt'), reason='No xlwt')), pytest.param('xlsxwriter', '.xlsx', marks=pytest.mark.skipif( From 872e7d6b969c2fd8d8b482f388802f1dd2d17943 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 25 Feb 2018 18:31:05 -0800 Subject: [PATCH 19/19] Added xfail for Py2, Linux, xlsxwriter combo --- pandas/tests/io/test_excel.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index e1e2622550504..fdf9954285db8 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1375,6 +1375,9 @@ def test_to_excel_timedelta(self, merge_cells, engine, ext): # GH 19242, GH9155 - test writing timedelta to xls if engine == 'openpyxl': pytest.xfail('Timedelta roundtrip broken with openpyxl') + if engine == 'xlsxwriter' and (sys.version_info[0] == 2 and + sys.platform.startswith('linux')): + pytest.xfail('Not working on linux with Py2 and xlsxwriter') frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), columns=['A'], dtype=np.int64