我有一个名为:oppty_oppline_sacc
的数据框,格式为:
opp_id$ sacc_id$
001 AAB
002 AAB
003 BBC
.. ..
我想添加一个名为nb_opportunity
的新列,其中包含每个sacc_id $的opp_id $的数量。所以我确实是这样的:
oppty_oppline_sacc['nb_oppline'] = oppty_oppline_sacc.groupby(['sacc_id$'],as_index=False)['opp_id$'].count()
但是我得到这个错误:
KeyError Traceback (most recent call last)
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2524 try:
-> 2525 return self._engine.get_loc(key)
2526 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'nb_oppline'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in set(self, item, value, check)
3967 try:
-> 3968 loc = self.items.get_loc(item)
3969 except KeyError:
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2526 except KeyError:
-> 2527 return self._engine.get_loc(self._maybe_cast_indexer(key))
2528
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'nb_oppline'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-96-7b260546d446> in <module>()
----> 1 oppty_oppline_sacc['nb_oppline'] = oppty_oppline_sacc.groupby(['sacc_id$'],as_index=False)['opp_line_id$'].count()
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
2517 else:
2518 # set column
-> 2519 self._set_item(key, value)
2520
2521 def _setitem_slice(self, key, value):
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
2584 self._ensure_valid_index(value)
2585 value = self._sanitize_column(key, value)
-> 2586 NDFrame._set_item(self, key, value)
2587
2588 # check if we are modifying a copy
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/generic.py in _set_item(self, key, value)
1952
1953 def _set_item(self, key, value):
-> 1954 self._data.set(key, value)
1955 self._clear_item_cache()
1956
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in set(self, item, value, check)
3969 except KeyError:
3970 # This item wasn't present, just insert at end
-> 3971 self.insert(len(self.items), item, value)
3972 return
3973
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in insert(self, loc, item, value, allow_duplicates)
4070
4071 block = make_block(values=value, ndim=self.ndim,
-> 4072 placement=slice(loc, loc + 1))
4073
4074 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
2955 placement=placement, dtype=dtype)
2956
-> 2957 return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
2958
2959 # TODO: flexible with index=None and/or items=None
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, ndim, fastpath, placement, **kwargs)
2080
2081 super(ObjectBlock, self).__init__(values, ndim=ndim, fastpath=fastpath,
-> 2082 placement=placement, **kwargs)
2083
2084 @property
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim, fastpath)
118 raise ValueError('Wrong number of items passed %d, placement '
119 'implies %d' % (len(self.values),
--> 120 len(self.mgr_locs)))
121
122 @property
ValueError: Wrong number of items passed 2, placement implies 1
能帮我解决这个问题吗?
谢谢
答案 0 :(得分:0)
@jezrael说了什么。 transform
方法保留原始索引,使您可以在数据框中添加新列。
df['nb_oppline'] = df.groupby('sacc_id$')['opp_id$'].transform('count')
输出
opp_id$ sacc_id$ nb_oppline
0 1 AAB 2
1 2 AAB 2
2 3 BBC 1