From 06557db9d5f665f2cd2d473077c6a887a7ff0dd9 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 22 Mar 2014 16:41:53 +0900 Subject: [PATCH] dataframe bar plot can now accept align keyword --- doc/source/release.rst | 10 +- doc/source/v0.14.0.txt | 8 +- pandas/tests/test_graphics.py | 211 ++++++++++++++++++++-------------- pandas/tools/plotting.py | 34 ++++-- 4 files changed, 164 insertions(+), 99 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 6d7751266036b..5134130ba7865 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -135,10 +135,16 @@ API Changes the index, rather than requiring a list of tuple (:issue:`4370`) - Fix a bug where invalid eval/query operations would blow the stack (:issue:`5198`) + - Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`. - - `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. + + - `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`) + + - `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`) + - `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`) - - Define and document the order of column vs index names in query/eval + +- Define and document the order of column vs index names in query/eval (:issue:`6676`) Deprecations diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 0b94b8e44a0dc..95537878871b1 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -174,9 +174,15 @@ These are out-of-bounds selections df_multi.set_index([df_multi.index, df_multi.index]) - Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`. - - `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. + + - `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`) + + - `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`) + - `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`) + Because of the default `align` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coodinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using `set_xlim`, `set_ylim`, etc. In this cases, please modify your script to meet with new coordinates. + MultiIndexing Using Slicers ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index fd0463ccd7ba0..7b37cf09d5638 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -615,47 +615,12 @@ def test_bar_barwidth(self): @slow def test_bar_barwidth_position(self): df = DataFrame(randn(5, 5)) - - width = 0.9 - position = 0.2 - - # regular - ax = df.plot(kind='bar', width=width, position=position) - p = ax.patches[0] - self.assertEqual(ax.xaxis.get_ticklocs()[0], - p.get_x() + p.get_width() * position * len(df.columns)) - - # stacked - ax = df.plot(kind='bar', stacked=True, width=width, position=position) - p = ax.patches[0] - self.assertEqual(ax.xaxis.get_ticklocs()[0], - p.get_x() + p.get_width() * position) - - # horizontal regular - ax = df.plot(kind='barh', width=width, position=position) - p = ax.patches[0] - self.assertEqual(ax.yaxis.get_ticklocs()[0], - p.get_y() + p.get_height() * position * len(df.columns)) - - # horizontal stacked - ax = df.plot(kind='barh', stacked=True, width=width, position=position) - p = ax.patches[0] - self.assertEqual(ax.yaxis.get_ticklocs()[0], - p.get_y() + p.get_height() * position) - - # subplots - axes = df.plot(kind='bar', width=width, position=position, subplots=True) - for ax in axes: - p = ax.patches[0] - self.assertEqual(ax.xaxis.get_ticklocs()[0], - p.get_x() + p.get_width() * position) - - # horizontal subplots - axes = df.plot(kind='barh', width=width, position=position, subplots=True) - for ax in axes: - p = ax.patches[0] - self.assertEqual(ax.yaxis.get_ticklocs()[0], - p.get_y() + p.get_height() * position) + self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9, position=0.2) + self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9, position=0.2) + self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9, position=0.2) + self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9, position=0.2) + self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9, position=0.2) + self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, position=0.2) @slow def test_plot_scatter(self): @@ -692,68 +657,144 @@ def test_plot_bar(self): df = DataFrame({'a': [0, 1], 'b': [1, 0]}) _check_plot_works(df.plot, kind='bar') + def _check_bar_alignment(self, df, kind='bar', stacked=False, + subplots=False, align='center', + width=0.5, position=0.5): + + axes = df.plot(kind=kind, stacked=stacked, subplots=subplots, + align=align, width=width, position=position, + grid=True) + + tick_pos = np.arange(len(df)) + + if not isinstance(axes, np.ndarray): + axes = [axes] + + for ax in axes: + if kind == 'bar': + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + elif kind == 'barh': + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + else: + raise ValueError + + p = ax.patches[0] + if kind == 'bar' and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + tickoffset = width * position + elif kind == 'bar' and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + if align == 'edge': + tickoffset = width * (position - 0.5) + p.get_width() * 1.5 + else: + tickoffset = width * position + p.get_width() + elif kind == 'barh' and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + tickoffset = width * position + elif kind == 'barh' and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + if align == 'edge': + tickoffset = width * (position - 0.5) + p.get_height() * 1.5 + else: + tickoffset = width * position + p.get_height() + else: + raise ValueError + + # Check the ticks locates on integer + self.assertTrue((axis.get_ticklocs() == np.arange(len(df))).all()) + + if align == 'center': + # Check whether the bar locates on center + self.assertAlmostEqual(axis.get_ticklocs()[0], center) + elif align == 'edge': + # Check whether the bar's edge starts from the tick + self.assertAlmostEqual(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + # Check starting point and axes limit margin + self.assertEqual(ax_min, tick_pos[0] - tickoffset - 0.25) + self.assertEqual(ax_max, tick_pos[-1] - tickoffset + 1) + # Check tick locations and axes limit margin + t_min = axis.get_ticklocs()[0] - tickoffset + t_max = axis.get_ticklocs()[-1] - tickoffset + self.assertAlmostEqual(ax_min, t_min - 0.25) + self.assertAlmostEqual(ax_max, t_max + 1.0) + return axes + + @slow def test_bar_stacked_center(self): # GH2157 df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) - ax = df.plot(kind='bar', stacked='True', grid=True) - self.assertEqual(ax.xaxis.get_ticklocs()[0], - ax.patches[0].get_x() + ax.patches[0].get_width() / 2) + axes = self._check_bar_alignment(df, kind='bar', stacked=True) + # Check the axes has the same drawing range before fixing # GH4525 + self.assertEqual(axes[0].get_xlim(), (-0.5, 4.75)) - ax = df.plot(kind='bar', stacked='True', width=0.9, grid=True) - self.assertEqual(ax.xaxis.get_ticklocs()[0], - ax.patches[0].get_x() + ax.patches[0].get_width() / 2) + self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9) - ax = df.plot(kind='barh', stacked='True', grid=True) - self.assertEqual(ax.yaxis.get_ticklocs()[0], - ax.patches[0].get_y() + ax.patches[0].get_height() / 2) + axes = self._check_bar_alignment(df, kind='barh', stacked=True) + self.assertEqual(axes[0].get_ylim(), (-0.5, 4.75)) - ax = df.plot(kind='barh', stacked='True', width=0.9, grid=True) - self.assertEqual(ax.yaxis.get_ticklocs()[0], - ax.patches[0].get_y() + ax.patches[0].get_height() / 2) + self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9) + @slow def test_bar_center(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) - ax = df.plot(kind='bar', grid=True) - self.assertEqual(ax.xaxis.get_ticklocs()[0], - ax.patches[0].get_x() + ax.patches[0].get_width()) - - ax = df.plot(kind='bar', width=0.9, grid=True) - self.assertEqual(ax.xaxis.get_ticklocs()[0], - ax.patches[0].get_x() + ax.patches[0].get_width()) + axes = self._check_bar_alignment(df, kind='bar', stacked=False) + self.assertEqual(axes[0].get_xlim(), (-0.75, 4.5)) - ax = df.plot(kind='barh', grid=True) - self.assertEqual(ax.yaxis.get_ticklocs()[0], - ax.patches[0].get_y() + ax.patches[0].get_height()) - - ax = df.plot(kind='barh', width=0.9, grid=True) - self.assertEqual(ax.yaxis.get_ticklocs()[0], - ax.patches[0].get_y() + ax.patches[0].get_height()) + self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9) + + axes = self._check_bar_alignment(df, kind='barh', stacked=False) + self.assertEqual(axes[0].get_ylim(), (-0.75, 4.5)) + self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9) + + @slow def test_bar_subplots_center(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) - axes = df.plot(kind='bar', grid=True, subplots=True) + axes = self._check_bar_alignment(df, kind='bar', subplots=True) for ax in axes: - for r in ax.patches: - self.assertEqual(ax.xaxis.get_ticklocs()[0], - ax.patches[0].get_x() + ax.patches[0].get_width() / 2) + self.assertEqual(ax.get_xlim(), (-0.5, 4.75)) - axes = df.plot(kind='bar', width=0.9, grid=True, subplots=True) + self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9) + + axes = self._check_bar_alignment(df, kind='barh', subplots=True) for ax in axes: - for r in ax.patches: - self.assertEqual(ax.xaxis.get_ticklocs()[0], - ax.patches[0].get_x() + ax.patches[0].get_width() / 2) + self.assertEqual(ax.get_ylim(), (-0.5, 4.75)) - axes = df.plot(kind='barh', grid=True, subplots=True) - for ax in axes: - for r in ax.patches: - self.assertEqual(ax.yaxis.get_ticklocs()[0], - ax.patches[0].get_y() + ax.patches[0].get_height() / 2) + self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9) - axes = df.plot(kind='barh', width=0.9, grid=True, subplots=True) - for ax in axes: - for r in ax.patches: - self.assertEqual(ax.yaxis.get_ticklocs()[0], - ax.patches[0].get_y() + ax.patches[0].get_height() / 2) + @slow + def test_bar_edge(self): + df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) + + self._check_bar_alignment(df, kind='bar', stacked=True, align='edge') + self._check_bar_alignment(df, kind='bar', stacked=True, + width=0.9, align='edge') + self._check_bar_alignment(df, kind='barh', stacked=True, align='edge') + self._check_bar_alignment(df, kind='barh', stacked=True, + width=0.9, align='edge') + + self._check_bar_alignment(df, kind='bar', stacked=False, align='edge') + self._check_bar_alignment(df, kind='bar', stacked=False, + width=0.9, align='edge') + self._check_bar_alignment(df, kind='barh', stacked=False, align='edge') + self._check_bar_alignment(df, kind='barh', stacked=False, + width=0.9, align='edge') + + self._check_bar_alignment(df, kind='bar', subplots=True, align='edge') + self._check_bar_alignment(df, kind='bar', subplots=True, + width=0.9, align='edge') + self._check_bar_alignment(df, kind='barh', subplots=True, align='edge') + self._check_bar_alignment(df, kind='barh', subplots=True, + width=0.9, align='edge') @slow def test_bar_log_no_subplots(self): diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 8ad17fd593871..2b73ae77970bf 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1673,18 +1673,26 @@ def __init__(self, data, **kwargs): self.mark_right = kwargs.pop('mark_right', True) self.stacked = kwargs.pop('stacked', False) - self.bar_width = kwargs.pop('width', 0.5) + self.bar_width = kwargs.pop('width', 0.5) pos = kwargs.pop('position', 0.5) - self.ax_pos = np.arange(len(data)) + self.bar_width * pos + kwargs['align'] = kwargs.pop('align', 'center') + self.tick_pos = np.arange(len(data)) + self.log = kwargs.pop('log',False) MPLPlot.__init__(self, data, **kwargs) if self.stacked or self.subplots: - self.tickoffset = self.bar_width * pos + self.tickoffset = self.bar_width * pos + elif kwargs['align'] == 'edge': + K = self.nseries + w = self.bar_width / K + self.tickoffset = self.bar_width * (pos - 0.5) + w * 1.5 else: K = self.nseries - self.tickoffset = self.bar_width * pos + self.bar_width / K + w = self.bar_width / K + self.tickoffset = self.bar_width * pos + w + self.ax_pos = self.tick_pos - self.tickoffset def _args_adjust(self): if self.rot is None: @@ -1751,19 +1759,21 @@ def _make_plot(self): start = 0 if mpl_le_1_2_1 else None if self.subplots: - rect = bar_f(ax, self.ax_pos, y, self.bar_width, + w = self.bar_width / 2 + rect = bar_f(ax, self.ax_pos + w, y, self.bar_width, start=start, **kwds) ax.set_title(label) elif self.stacked: mask = y > 0 start = np.where(mask, pos_prior, neg_prior) - rect = bar_f(ax, self.ax_pos, y, self.bar_width, start=start, - label=label, **kwds) + w = self.bar_width / 2 + rect = bar_f(ax, self.ax_pos + w, y, self.bar_width, + start=start, label=label, **kwds) pos_prior = pos_prior + np.where(mask, y, 0) neg_prior = neg_prior + np.where(mask, 0, y) else: w = self.bar_width / K - rect = bar_f(ax, self.ax_pos + (i + 1) * w, y, w, + rect = bar_f(ax, self.ax_pos + (i + 1.5) * w, y, w, start=start, label=label, **kwds) rects.append(rect) if self.mark_right: @@ -1789,22 +1799,24 @@ def _post_plot_logic(self): name = self._get_index_name() if self.kind == 'bar': ax.set_xlim([self.ax_pos[0] - 0.25, self.ax_pos[-1] + 1]) - ax.set_xticks(self.ax_pos + self.tickoffset) + ax.set_xticks(self.tick_pos) ax.set_xticklabels(str_index, rotation=self.rot, fontsize=self.fontsize) if not self.log: # GH3254+ ax.axhline(0, color='k', linestyle='--') if name is not None: ax.set_xlabel(name) - else: + elif self.kind == 'barh': # horizontal bars ax.set_ylim([self.ax_pos[0] - 0.25, self.ax_pos[-1] + 1]) - ax.set_yticks(self.ax_pos + self.tickoffset) + ax.set_yticks(self.tick_pos) ax.set_yticklabels(str_index, rotation=self.rot, fontsize=self.fontsize) ax.axvline(0, color='k', linestyle='--') if name is not None: ax.set_ylabel(name) + else: + raise NotImplementedError(self.kind) # if self.subplots and self.legend: # self.axes[0].legend(loc='best')