BUG: to_clipboard fails to format output for Excel (#21111)

pandas-dev · Jun 29, 2018 · dc45fba · dc45fba
1 parent 0b63e81
commit dc45fba
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 22 deletions.
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
@@ -57,6 +57,7 @@ Fixed Regressions
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
 - Fixed regression in unary negative operations with object dtype (:issue:`21380`)
 - Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
+- Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`)
 
 .. _whatsnew_0232.performance:
 

diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
@@ -1,6 +1,7 @@
 """ io on the clipboard """
 from pandas import compat, get_option, option_context, DataFrame
-from pandas.compat import StringIO, PY2
+from pandas.compat import StringIO, PY2, PY3
+import warnings
 
 
 def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
@@ -32,7 +33,7 @@ def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
 
     # try to decode (if needed on PY3)
     # Strange. linux py33 doesn't complain, win py33 does
-    if compat.PY3:
+    if PY3:
         try:
             text = compat.bytes_to_str(
                 text, encoding=(kwargs.get('encoding') or
@@ -55,11 +56,27 @@ def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
 
     counts = {x.lstrip().count('\t') for x in lines}
     if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
-        sep = r'\t'
+        sep = '\t'
 
+    # Edge case where sep is specified to be None, return to default
     if sep is None and kwargs.get('delim_whitespace') is None:
         sep = r'\s+'
 
+    # Regex separator currently only works with python engine.
+    # Default to python if separator is multi-character (regex)
+    if len(sep) > 1 and kwargs.get('engine') is None:
+        kwargs['engine'] = 'python'
+    elif len(sep) > 1 and kwargs.get('engine') == 'c':
+        warnings.warn('read_clipboard with regex separator does not work'
+                      ' properly with c engine')
+
+    # In PY2, the c table reader first encodes text with UTF-8 but Python
+    # table reader uses the format of the passed string. For consistency,
+    # encode strings for python engine so that output from python and c
+    # engines produce consistent results
+    if kwargs.get('engine') == 'python' and PY2:
+        text = text.encode('utf-8')
+
     return read_table(StringIO(text), sep=sep, **kwargs)
 
 
@@ -99,7 +116,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs):  # pragma: no cover
     if excel:
         try:
             if sep is None:
-                sep = r'\t'
+                sep = '\t'
             buf = StringIO()
             # clipboard_set (pyperclip) expects unicode
             obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
@@ -108,8 +125,11 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs):  # pragma: no cover
                 text = text.decode('utf-8')
             clipboard_set(text)
             return
-        except:
-            pass
+        except TypeError:
+            warnings.warn('to_clipboard in excel mode requires a single '
+                          'character separator.')
+    elif sep is not None:
+        warnings.warn('to_clipboard with excel=False ignores the sep argument')
 
     if isinstance(obj, DataFrame):
         # str(df) has various unhelpful defaults, like truncation

diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
@@ -88,8 +88,6 @@ def check_round_trip_frame(self, data, excel=None, sep=None,
         tm.assert_frame_equal(data, result, check_dtype=False)
 
     # Test that default arguments copy as tab delimited
-    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
-                       'Issue in #21104, Fixed in #21111')
     def test_round_trip_frame(self, df):
         self.check_round_trip_frame(df)
 
@@ -99,10 +97,6 @@ def test_round_trip_frame_sep(self, df, sep):
         self.check_round_trip_frame(df, sep=sep)
 
     # Test white space separator
-    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
-                       "aren't handled correctly in default c engine. Fixed "
-                       "in #21111 by defaulting to python engine for "
-                       "whitespace separator")
     def test_round_trip_frame_string(self, df):
         df.to_clipboard(excel=False, sep=None)
         result = read_clipboard()
@@ -111,21 +105,17 @@ def test_round_trip_frame_string(self, df):
 
     # Two character separator is not supported in to_clipboard
     # Test that multi-character separators are not silently passed
-    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
     def test_excel_sep_warning(self, df):
         with tm.assert_produces_warning():
             df.to_clipboard(excel=True, sep=r'\t')
 
     # Separator is ignored when excel=False and should produce a warning
-    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
     def test_copy_delim_warning(self, df):
         with tm.assert_produces_warning():
             df.to_clipboard(excel=False, sep='\t')
 
     # Tests that the default behavior of to_clipboard is tab
     # delimited and excel="True"
-    @pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in "
-                       "#21104, Fixed in #21111")
     @pytest.mark.parametrize('sep', ['\t', None, 'default'])
     @pytest.mark.parametrize('excel', [True, None, 'default'])
     def test_clipboard_copy_tabs_default(self, sep, excel, df):
@@ -139,10 +129,6 @@ def test_clipboard_copy_tabs_default(self, sep, excel, df):
             assert clipboard_get() == df.to_csv(sep='\t')
 
     # Tests reading of white space separated tables
-    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
-                       "aren't handled correctly. in default c engine. Fixed "
-                       "in #21111 by defaulting to python engine for "
-                       "whitespace separator")
     @pytest.mark.parametrize('sep', [None, 'default'])
     @pytest.mark.parametrize('excel', [False])
     def test_clipboard_copy_strings(self, sep, excel, df):
@@ -193,8 +179,6 @@ def test_invalid_encoding(self, df):
         with pytest.raises(NotImplementedError):
             pd.read_clipboard(encoding='ascii')
 
-    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
-                       'Issue in #21104, Fixed in #21111')
     @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
     def test_round_trip_valid_encodings(self, enc, df):
         self.check_round_trip_frame(df, encoding=enc)