Skip to content

Commit

Permalink
Merge pull request #6691 from sinhrks/bar_center_pr
Browse files Browse the repository at this point in the history
ENH/VIS: Dataframe bar plot can now handle align keyword properly
  • Loading branch information
Tom Augspurger committed Mar 26, 2014
2 parents 5999ac8 + 06557db commit 110406c
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 99 deletions.
10 changes: 8 additions & 2 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,16 @@ API Changes
the index, rather than requiring a list of tuple (:issue:`4370`)

- Fix a bug where invalid eval/query operations would blow the stack (:issue:`5198`)

- Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`.
- `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten.

- `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`)

- `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`)

- `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`)
- Define and document the order of column vs index names in query/eval

- Define and document the order of column vs index names in query/eval
(:issue:`6676`)

Deprecations
Expand Down
8 changes: 7 additions & 1 deletion doc/source/v0.14.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,15 @@ These are out-of-bounds selections
df_multi.set_index([df_multi.index, df_multi.index])

- Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`.
- `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten.

- `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`)

- `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`)

- `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`)

Because of the default `align` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coodinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using `set_xlim`, `set_ylim`, etc. In this cases, please modify your script to meet with new coordinates.


MultiIndexing Using Slicers
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
211 changes: 126 additions & 85 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,47 +615,12 @@ def test_bar_barwidth(self):
@slow
def test_bar_barwidth_position(self):
df = DataFrame(randn(5, 5))

width = 0.9
position = 0.2

# regular
ax = df.plot(kind='bar', width=width, position=position)
p = ax.patches[0]
self.assertEqual(ax.xaxis.get_ticklocs()[0],
p.get_x() + p.get_width() * position * len(df.columns))

# stacked
ax = df.plot(kind='bar', stacked=True, width=width, position=position)
p = ax.patches[0]
self.assertEqual(ax.xaxis.get_ticklocs()[0],
p.get_x() + p.get_width() * position)

# horizontal regular
ax = df.plot(kind='barh', width=width, position=position)
p = ax.patches[0]
self.assertEqual(ax.yaxis.get_ticklocs()[0],
p.get_y() + p.get_height() * position * len(df.columns))

# horizontal stacked
ax = df.plot(kind='barh', stacked=True, width=width, position=position)
p = ax.patches[0]
self.assertEqual(ax.yaxis.get_ticklocs()[0],
p.get_y() + p.get_height() * position)

# subplots
axes = df.plot(kind='bar', width=width, position=position, subplots=True)
for ax in axes:
p = ax.patches[0]
self.assertEqual(ax.xaxis.get_ticklocs()[0],
p.get_x() + p.get_width() * position)

# horizontal subplots
axes = df.plot(kind='barh', width=width, position=position, subplots=True)
for ax in axes:
p = ax.patches[0]
self.assertEqual(ax.yaxis.get_ticklocs()[0],
p.get_y() + p.get_height() * position)
self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9, position=0.2)
self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9, position=0.2)
self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9, position=0.2)
self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9, position=0.2)
self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9, position=0.2)
self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, position=0.2)

@slow
def test_plot_scatter(self):
Expand Down Expand Up @@ -692,68 +657,144 @@ def test_plot_bar(self):
df = DataFrame({'a': [0, 1], 'b': [1, 0]})
_check_plot_works(df.plot, kind='bar')

def _check_bar_alignment(self, df, kind='bar', stacked=False,
subplots=False, align='center',
width=0.5, position=0.5):

axes = df.plot(kind=kind, stacked=stacked, subplots=subplots,
align=align, width=width, position=position,
grid=True)

tick_pos = np.arange(len(df))

if not isinstance(axes, np.ndarray):
axes = [axes]

for ax in axes:
if kind == 'bar':
axis = ax.xaxis
ax_min, ax_max = ax.get_xlim()
elif kind == 'barh':
axis = ax.yaxis
ax_min, ax_max = ax.get_ylim()
else:
raise ValueError

p = ax.patches[0]
if kind == 'bar' and (stacked is True or subplots is True):
edge = p.get_x()
center = edge + p.get_width() * position
tickoffset = width * position
elif kind == 'bar' and stacked is False:
center = p.get_x() + p.get_width() * len(df.columns) * position
edge = p.get_x()
if align == 'edge':
tickoffset = width * (position - 0.5) + p.get_width() * 1.5
else:
tickoffset = width * position + p.get_width()
elif kind == 'barh' and (stacked is True or subplots is True):
center = p.get_y() + p.get_height() * position
edge = p.get_y()
tickoffset = width * position
elif kind == 'barh' and stacked is False:
center = p.get_y() + p.get_height() * len(df.columns) * position
edge = p.get_y()
if align == 'edge':
tickoffset = width * (position - 0.5) + p.get_height() * 1.5
else:
tickoffset = width * position + p.get_height()
else:
raise ValueError

# Check the ticks locates on integer
self.assertTrue((axis.get_ticklocs() == np.arange(len(df))).all())

if align == 'center':
# Check whether the bar locates on center
self.assertAlmostEqual(axis.get_ticklocs()[0], center)
elif align == 'edge':
# Check whether the bar's edge starts from the tick
self.assertAlmostEqual(axis.get_ticklocs()[0], edge)
else:
raise ValueError

# Check starting point and axes limit margin
self.assertEqual(ax_min, tick_pos[0] - tickoffset - 0.25)
self.assertEqual(ax_max, tick_pos[-1] - tickoffset + 1)
# Check tick locations and axes limit margin
t_min = axis.get_ticklocs()[0] - tickoffset
t_max = axis.get_ticklocs()[-1] - tickoffset
self.assertAlmostEqual(ax_min, t_min - 0.25)
self.assertAlmostEqual(ax_max, t_max + 1.0)
return axes

@slow
def test_bar_stacked_center(self):
# GH2157
df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
ax = df.plot(kind='bar', stacked='True', grid=True)
self.assertEqual(ax.xaxis.get_ticklocs()[0],
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
axes = self._check_bar_alignment(df, kind='bar', stacked=True)
# Check the axes has the same drawing range before fixing # GH4525
self.assertEqual(axes[0].get_xlim(), (-0.5, 4.75))

ax = df.plot(kind='bar', stacked='True', width=0.9, grid=True)
self.assertEqual(ax.xaxis.get_ticklocs()[0],
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9)

ax = df.plot(kind='barh', stacked='True', grid=True)
self.assertEqual(ax.yaxis.get_ticklocs()[0],
ax.patches[0].get_y() + ax.patches[0].get_height() / 2)
axes = self._check_bar_alignment(df, kind='barh', stacked=True)
self.assertEqual(axes[0].get_ylim(), (-0.5, 4.75))

ax = df.plot(kind='barh', stacked='True', width=0.9, grid=True)
self.assertEqual(ax.yaxis.get_ticklocs()[0],
ax.patches[0].get_y() + ax.patches[0].get_height() / 2)
self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9)

@slow
def test_bar_center(self):
df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
ax = df.plot(kind='bar', grid=True)
self.assertEqual(ax.xaxis.get_ticklocs()[0],
ax.patches[0].get_x() + ax.patches[0].get_width())

ax = df.plot(kind='bar', width=0.9, grid=True)
self.assertEqual(ax.xaxis.get_ticklocs()[0],
ax.patches[0].get_x() + ax.patches[0].get_width())
axes = self._check_bar_alignment(df, kind='bar', stacked=False)
self.assertEqual(axes[0].get_xlim(), (-0.75, 4.5))

ax = df.plot(kind='barh', grid=True)
self.assertEqual(ax.yaxis.get_ticklocs()[0],
ax.patches[0].get_y() + ax.patches[0].get_height())

ax = df.plot(kind='barh', width=0.9, grid=True)
self.assertEqual(ax.yaxis.get_ticklocs()[0],
ax.patches[0].get_y() + ax.patches[0].get_height())
self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9)

axes = self._check_bar_alignment(df, kind='barh', stacked=False)
self.assertEqual(axes[0].get_ylim(), (-0.75, 4.5))

self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9)

@slow
def test_bar_subplots_center(self):
df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
axes = df.plot(kind='bar', grid=True, subplots=True)
axes = self._check_bar_alignment(df, kind='bar', subplots=True)
for ax in axes:
for r in ax.patches:
self.assertEqual(ax.xaxis.get_ticklocs()[0],
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
self.assertEqual(ax.get_xlim(), (-0.5, 4.75))

axes = df.plot(kind='bar', width=0.9, grid=True, subplots=True)
self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9)

axes = self._check_bar_alignment(df, kind='barh', subplots=True)
for ax in axes:
for r in ax.patches:
self.assertEqual(ax.xaxis.get_ticklocs()[0],
ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
self.assertEqual(ax.get_ylim(), (-0.5, 4.75))

axes = df.plot(kind='barh', grid=True, subplots=True)
for ax in axes:
for r in ax.patches:
self.assertEqual(ax.yaxis.get_ticklocs()[0],
ax.patches[0].get_y() + ax.patches[0].get_height() / 2)
self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9)

axes = df.plot(kind='barh', width=0.9, grid=True, subplots=True)
for ax in axes:
for r in ax.patches:
self.assertEqual(ax.yaxis.get_ticklocs()[0],
ax.patches[0].get_y() + ax.patches[0].get_height() / 2)
@slow
def test_bar_edge(self):
df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))

self._check_bar_alignment(df, kind='bar', stacked=True, align='edge')
self._check_bar_alignment(df, kind='bar', stacked=True,
width=0.9, align='edge')
self._check_bar_alignment(df, kind='barh', stacked=True, align='edge')
self._check_bar_alignment(df, kind='barh', stacked=True,
width=0.9, align='edge')

self._check_bar_alignment(df, kind='bar', stacked=False, align='edge')
self._check_bar_alignment(df, kind='bar', stacked=False,
width=0.9, align='edge')
self._check_bar_alignment(df, kind='barh', stacked=False, align='edge')
self._check_bar_alignment(df, kind='barh', stacked=False,
width=0.9, align='edge')

self._check_bar_alignment(df, kind='bar', subplots=True, align='edge')
self._check_bar_alignment(df, kind='bar', subplots=True,
width=0.9, align='edge')
self._check_bar_alignment(df, kind='barh', subplots=True, align='edge')
self._check_bar_alignment(df, kind='barh', subplots=True,
width=0.9, align='edge')

@slow
def test_bar_log_no_subplots(self):
Expand Down
34 changes: 23 additions & 11 deletions pandas/tools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1673,18 +1673,26 @@ def __init__(self, data, **kwargs):
self.mark_right = kwargs.pop('mark_right', True)
self.stacked = kwargs.pop('stacked', False)

self.bar_width = kwargs.pop('width', 0.5)
self.bar_width = kwargs.pop('width', 0.5)
pos = kwargs.pop('position', 0.5)
self.ax_pos = np.arange(len(data)) + self.bar_width * pos

kwargs['align'] = kwargs.pop('align', 'center')
self.tick_pos = np.arange(len(data))

self.log = kwargs.pop('log',False)
MPLPlot.__init__(self, data, **kwargs)

if self.stacked or self.subplots:
self.tickoffset = self.bar_width * pos
self.tickoffset = self.bar_width * pos
elif kwargs['align'] == 'edge':
K = self.nseries
w = self.bar_width / K
self.tickoffset = self.bar_width * (pos - 0.5) + w * 1.5
else:
K = self.nseries
self.tickoffset = self.bar_width * pos + self.bar_width / K
w = self.bar_width / K
self.tickoffset = self.bar_width * pos + w
self.ax_pos = self.tick_pos - self.tickoffset

def _args_adjust(self):
if self.rot is None:
Expand Down Expand Up @@ -1751,19 +1759,21 @@ def _make_plot(self):
start = 0 if mpl_le_1_2_1 else None

if self.subplots:
rect = bar_f(ax, self.ax_pos, y, self.bar_width,
w = self.bar_width / 2
rect = bar_f(ax, self.ax_pos + w, y, self.bar_width,
start=start, **kwds)
ax.set_title(label)
elif self.stacked:
mask = y > 0
start = np.where(mask, pos_prior, neg_prior)
rect = bar_f(ax, self.ax_pos, y, self.bar_width, start=start,
label=label, **kwds)
w = self.bar_width / 2
rect = bar_f(ax, self.ax_pos + w, y, self.bar_width,
start=start, label=label, **kwds)
pos_prior = pos_prior + np.where(mask, y, 0)
neg_prior = neg_prior + np.where(mask, 0, y)
else:
w = self.bar_width / K
rect = bar_f(ax, self.ax_pos + (i + 1) * w, y, w,
rect = bar_f(ax, self.ax_pos + (i + 1.5) * w, y, w,
start=start, label=label, **kwds)
rects.append(rect)
if self.mark_right:
Expand All @@ -1789,22 +1799,24 @@ def _post_plot_logic(self):
name = self._get_index_name()
if self.kind == 'bar':
ax.set_xlim([self.ax_pos[0] - 0.25, self.ax_pos[-1] + 1])
ax.set_xticks(self.ax_pos + self.tickoffset)
ax.set_xticks(self.tick_pos)
ax.set_xticklabels(str_index, rotation=self.rot,
fontsize=self.fontsize)
if not self.log: # GH3254+
ax.axhline(0, color='k', linestyle='--')
if name is not None:
ax.set_xlabel(name)
else:
elif self.kind == 'barh':
# horizontal bars
ax.set_ylim([self.ax_pos[0] - 0.25, self.ax_pos[-1] + 1])
ax.set_yticks(self.ax_pos + self.tickoffset)
ax.set_yticks(self.tick_pos)
ax.set_yticklabels(str_index, rotation=self.rot,
fontsize=self.fontsize)
ax.axvline(0, color='k', linestyle='--')
if name is not None:
ax.set_ylabel(name)
else:
raise NotImplementedError(self.kind)

# if self.subplots and self.legend:
# self.axes[0].legend(loc='best')
Expand Down

0 comments on commit 110406c

Please sign in to comment.