diff --git a/ci/doctests.sh b/ci/doctests.sh index e7fe80e60eb6d..48774a1e4d00d 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then # DataFrame / Series docstrings pytest --doctest-modules -v pandas/core/frame.py \ - -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" + -k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0099f705fe1e1..81d5c112885ec 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3280,7 +3280,7 @@ def assign(self, **kwargs): Parameters ---------- - kwargs : keyword, value pairs + **kwargs : dict of {str: callable or Series} The column names are keywords. If the values are callable, they are computed on the DataFrame and assigned to the new columns. The callable must not @@ -3290,7 +3290,7 @@ def assign(self, **kwargs): Returns ------- - df : DataFrame + DataFrame A new DataFrame with the new columns in addition to all the existing columns. @@ -3310,48 +3310,34 @@ def assign(self, **kwargs): Examples -------- - >>> df = pd.DataFrame({'A': range(1, 11), 'B': np.random.randn(10)}) + >>> df = pd.DataFrame({'temp_c': [17.0, 25.0]}, + ... index=['Portland', 'Berkeley']) + >>> df + temp_c + Portland 17.0 + Berkeley 25.0 Where the value is a callable, evaluated on `df`: - - >>> df.assign(ln_A = lambda x: np.log(x.A)) - A B ln_A - 0 1 0.426905 0.000000 - 1 2 -0.780949 0.693147 - 2 3 -0.418711 1.098612 - 3 4 -0.269708 1.386294 - 4 5 -0.274002 1.609438 - 5 6 -0.500792 1.791759 - 6 7 1.649697 1.945910 - 7 8 -1.495604 2.079442 - 8 9 0.549296 2.197225 - 9 10 -0.758542 2.302585 - - Where the value already exists and is inserted: - - >>> newcol = np.log(df['A']) - >>> df.assign(ln_A=newcol) - A B ln_A - 0 1 0.426905 0.000000 - 1 2 -0.780949 0.693147 - 2 3 -0.418711 1.098612 - 3 4 -0.269708 1.386294 - 4 5 -0.274002 1.609438 - 5 6 -0.500792 1.791759 - 6 7 1.649697 1.945910 - 7 8 -1.495604 2.079442 - 8 9 0.549296 2.197225 - 9 10 -0.758542 2.302585 - - Where the keyword arguments depend on each other - - >>> df = pd.DataFrame({'A': [1, 2, 3]}) - - >>> df.assign(B=df.A, C=lambda x:x['A']+ x['B']) - A B C - 0 1 1 2 - 1 2 2 4 - 2 3 3 6 + >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32) + temp_c temp_f + Portland 17.0 62.6 + Berkeley 25.0 77.0 + + Alternatively, the same behavior can be achieved by directly + referencing an existing Series or sequence: + >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32) + temp_c temp_f + Portland 17.0 62.6 + Berkeley 25.0 77.0 + + In Python 3.6+, you can create multiple columns within the same assign + where one of the columns depends on another one defined within the same + assign: + >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32, + ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) + temp_c temp_f temp_k + Portland 17.0 62.6 290.15 + Berkeley 25.0 77.0 298.15 """ data = self.copy()