在包含行数的数据框中添加新列

时间:2019-03-14 14:28:41

标签: python pandas pandas-groupby

我有一个名为:oppty_oppline_sacc的数据框,格式为:

opp_id$     sacc_id$
001         AAB
002         AAB
003         BBC
..          ..  

我想添加一个名为nb_opportunity的新列,其中包含每个sacc_id $的opp_id $的数量。所以我确实是这样的:

oppty_oppline_sacc['nb_oppline'] = oppty_oppline_sacc.groupby(['sacc_id$'],as_index=False)['opp_id$'].count()

但是我得到这个错误:

KeyError                                  Traceback (most recent call last)
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2524             try:
-> 2525                 return self._engine.get_loc(key)
   2526             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'nb_oppline'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in set(self, item, value, check)
   3967         try:
-> 3968             loc = self.items.get_loc(item)
   3969         except KeyError:

~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2526             except KeyError:
-> 2527                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2528 

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'nb_oppline'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-96-7b260546d446> in <module>()
----> 1 oppty_oppline_sacc['nb_oppline'] = oppty_oppline_sacc.groupby(['sacc_id$'],as_index=False)['opp_line_id$'].count()

~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
   2517         else:
   2518             # set column
-> 2519             self._set_item(key, value)
   2520 
   2521     def _setitem_slice(self, key, value):

~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
   2584         self._ensure_valid_index(value)
   2585         value = self._sanitize_column(key, value)
-> 2586         NDFrame._set_item(self, key, value)
   2587 
   2588         # check if we are modifying a copy

~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/generic.py in _set_item(self, key, value)
   1952 
   1953     def _set_item(self, key, value):
-> 1954         self._data.set(key, value)
   1955         self._clear_item_cache()
   1956 

~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in set(self, item, value, check)
   3969         except KeyError:
   3970             # This item wasn't present, just insert at end
-> 3971             self.insert(len(self.items), item, value)
   3972             return
   3973 

~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in insert(self, loc, item, value, allow_duplicates)
   4070 
   4071         block = make_block(values=value, ndim=self.ndim,
-> 4072                            placement=slice(loc, loc + 1))
   4073 
   4074         for blkno, count in _fast_count_smallints(self._blknos[loc:]):

~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   2955                      placement=placement, dtype=dtype)
   2956 
-> 2957     return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
   2958 
   2959 # TODO: flexible with index=None and/or items=None

~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, ndim, fastpath, placement, **kwargs)
   2080 
   2081         super(ObjectBlock, self).__init__(values, ndim=ndim, fastpath=fastpath,
-> 2082                                           placement=placement, **kwargs)
   2083 
   2084     @property

~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim, fastpath)
    118             raise ValueError('Wrong number of items passed %d, placement '
    119                              'implies %d' % (len(self.values),
--> 120                                              len(self.mgr_locs)))
    121 
    122     @property

ValueError: Wrong number of items passed 2, placement implies 1

能帮我解决这个问题吗?

谢谢

1 个答案:

答案 0 :(得分:0)

@jezrael说了什么。 transform方法保留原始索引,使您可以在数据框中添加新列。

df['nb_oppline'] = df.groupby('sacc_id$')['opp_id$'].transform('count')

输出

   opp_id$ sacc_id$  nb_oppline
0        1      AAB           2
1        2      AAB           2
2        3      BBC           1