Skip to content

Commit

Permalink
BUG: to_clipboard fails to format output for Excel (#21111)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-liu-brattle-1 authored and jorisvandenbossche committed Jun 29, 2018
1 parent 0b63e81 commit dc45fba
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 22 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ Fixed Regressions
- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
- Fixed regression in unary negative operations with object dtype (:issue:`21380`)
- Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
- Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`)

.. _whatsnew_0232.performance:

Expand Down
32 changes: 26 additions & 6 deletions pandas/io/clipboards.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
""" io on the clipboard """
from pandas import compat, get_option, option_context, DataFrame
from pandas.compat import StringIO, PY2
from pandas.compat import StringIO, PY2, PY3
import warnings


def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
Expand Down Expand Up @@ -32,7 +33,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover

# try to decode (if needed on PY3)
# Strange. linux py33 doesn't complain, win py33 does
if compat.PY3:
if PY3:
try:
text = compat.bytes_to_str(
text, encoding=(kwargs.get('encoding') or
Expand All @@ -55,11 +56,27 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover

counts = {x.lstrip().count('\t') for x in lines}
if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
sep = r'\t'
sep = '\t'

# Edge case where sep is specified to be None, return to default
if sep is None and kwargs.get('delim_whitespace') is None:
sep = r'\s+'

# Regex separator currently only works with python engine.
# Default to python if separator is multi-character (regex)
if len(sep) > 1 and kwargs.get('engine') is None:
kwargs['engine'] = 'python'
elif len(sep) > 1 and kwargs.get('engine') == 'c':
warnings.warn('read_clipboard with regex separator does not work'
' properly with c engine')

# In PY2, the c table reader first encodes text with UTF-8 but Python
# table reader uses the format of the passed string. For consistency,
# encode strings for python engine so that output from python and c
# engines produce consistent results
if kwargs.get('engine') == 'python' and PY2:
text = text.encode('utf-8')

return read_table(StringIO(text), sep=sep, **kwargs)


Expand Down Expand Up @@ -99,7 +116,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover
if excel:
try:
if sep is None:
sep = r'\t'
sep = '\t'
buf = StringIO()
# clipboard_set (pyperclip) expects unicode
obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
Expand All @@ -108,8 +125,11 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover
text = text.decode('utf-8')
clipboard_set(text)
return
except:
pass
except TypeError:
warnings.warn('to_clipboard in excel mode requires a single '
'character separator.')
elif sep is not None:
warnings.warn('to_clipboard with excel=False ignores the sep argument')

if isinstance(obj, DataFrame):
# str(df) has various unhelpful defaults, like truncation
Expand Down
16 changes: 0 additions & 16 deletions pandas/tests/io/test_clipboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ def check_round_trip_frame(self, data, excel=None, sep=None,
tm.assert_frame_equal(data, result, check_dtype=False)

# Test that default arguments copy as tab delimited
@pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
'Issue in #21104, Fixed in #21111')
def test_round_trip_frame(self, df):
self.check_round_trip_frame(df)

Expand All @@ -99,10 +97,6 @@ def test_round_trip_frame_sep(self, df, sep):
self.check_round_trip_frame(df, sep=sep)

# Test white space separator
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
"aren't handled correctly in default c engine. Fixed "
"in #21111 by defaulting to python engine for "
"whitespace separator")
def test_round_trip_frame_string(self, df):
df.to_clipboard(excel=False, sep=None)
result = read_clipboard()
Expand All @@ -111,21 +105,17 @@ def test_round_trip_frame_string(self, df):

# Two character separator is not supported in to_clipboard
# Test that multi-character separators are not silently passed
@pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111")
def test_excel_sep_warning(self, df):
with tm.assert_produces_warning():
df.to_clipboard(excel=True, sep=r'\t')

# Separator is ignored when excel=False and should produce a warning
@pytest.mark.xfail(reason="Not yet implemented. Fixed in #21111")
def test_copy_delim_warning(self, df):
with tm.assert_produces_warning():
df.to_clipboard(excel=False, sep='\t')

# Tests that the default behavior of to_clipboard is tab
# delimited and excel="True"
@pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in "
"#21104, Fixed in #21111")
@pytest.mark.parametrize('sep', ['\t', None, 'default'])
@pytest.mark.parametrize('excel', [True, None, 'default'])
def test_clipboard_copy_tabs_default(self, sep, excel, df):
Expand All @@ -139,10 +129,6 @@ def test_clipboard_copy_tabs_default(self, sep, excel, df):
assert clipboard_get() == df.to_csv(sep='\t')

# Tests reading of white space separated tables
@pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
"aren't handled correctly. in default c engine. Fixed "
"in #21111 by defaulting to python engine for "
"whitespace separator")
@pytest.mark.parametrize('sep', [None, 'default'])
@pytest.mark.parametrize('excel', [False])
def test_clipboard_copy_strings(self, sep, excel, df):
Expand Down Expand Up @@ -193,8 +179,6 @@ def test_invalid_encoding(self, df):
with pytest.raises(NotImplementedError):
pd.read_clipboard(encoding='ascii')

@pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
'Issue in #21104, Fixed in #21111')
@pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
def test_round_trip_valid_encodings(self, enc, df):
self.check_round_trip_frame(df, encoding=enc)

0 comments on commit dc45fba

Please sign in to comment.