
时间:2019-06-14 21:02:24

标签: python pandas dataframe


    member_id  survey_score  call_reason  call_direction      time_stamp
0     bob13         0          returns       inbound      2019-03-18 10:12:00
1     ub40          5         complaint      inbound      2019-03-19 11:12:00
2     bob13         7          returns       outbound     2019-03-19 09:15:00
3     todd100       3         order_error    inbound      2019-03-20 10:15:00
4     ub40          2         complaint      inbound      2019-03-21 12:11:00
5     todd100       7         order_error    outbound     2019-03-22 08:10:00
6     ub40          1         complaint      outbound     2019-03-22 11:09:00
7     ron34         6         exchange       inbound      2019-03-22 13:09:00
8     ron34         7         returns         inbound      2019-03-24 15:03:00


    member_id    call_reason     score_differential          
0     bob13       returns               7
1     ub40       complaint             -1
2     todd100    order_error            4       





我尝试了很多方法来使输出正确,但是我仍然遇到很大的困难。我觉得事情太复杂了。 首先,我得到了电话订单。然后,我为第一得分入站呼叫得分和得分差异创建一些列。然后,我获得了要迭代的所有唯一成员Id的列表,最后,我用一堆逻辑弄成了一个巨大的循环,使我迷路了。


df['call_order'] = df_repeat.groupby('member_id')['timestamp'].rank(ascending=True, method = 'dense')

df["first_call_survey_score"] = ""
df["first_call_survey_score"] = np.nan
df["score_differential"] = ""
df["score_differential"] = np.nan

member_list = df['member_id'].unique()

unscorable = 0
for member in member_list:
        count = 2
        temp = df.loc[df['member_id'] == member]
        temp = temp.drop_duplicates(subset='call_order', keep="first")
        num_calls = temp['member_id'].count()
        first_call = temp.query("call_order == 1")
        first_survey_score = first_call['survey_score'].values[0]
        reason = first_call['call_reason'].values[0]
        sumscore = 0
        legit_call_count = 0
        while count <= num_calls:
                next_call = temp.query("call_order == @count")
                if reason == next_call['call_reason'].values[0]:
                    sumscore = sumscore + next_call['survey_score'].values[0]
                    count = count + 1
                    legit_call_count = legit_call_count + 1 
                elif reason != next_call['call_reason'].values[0] and count == num_calls:
                    count = 20
                elif reason != next_call['call_reason'].values[0]:
                    count = count + 1
                    next_call = temp.query("call_order == @count")
                    reason = next_call['call_reason'].values[0]
                    first_survey_score = next_call['survey_score'].values[0]
                else: count = count + 1

        if legit_call_count == 1:
            df.loc[((df_repeat['member_id'] == member)),['score_differential']] = sumscore / legit_call_count - first_survey_score
        elif count == 20:unscorable = unscorable + 1
            df.loc[((df['member_id'] == member)),['score_differential']] = sumscore / legit_call_count - first_survey_score
    except Exception as exception:
            unscorable = unscorable + 1

print(unscorable, "Callers could not be scored")

1 个答案:

答案 0 :(得分:0)


txt = """\
   member_id  survey_score  call_reason  call_direction      time_stamp
     bob13         0          returns       inbound      2019-03-18T10:12:00
     ub40          5         complaint      inbound      2019-03-19T11:12:00
     bob13         7          returns       outbound     2019-03-19T09:15:00
     todd100       3         order_error    inbound      2019-03-20T10:15:00
     ub40          2         complaint      inbound      2019-03-21T12:11:00
     todd100       7         order_error    outbound     2019-03-22T08:10:00
     ub40          1         complaint      outbound     2019-03-22T11:09:00
     ron34         6         exchange       inbound      2019-03-22T13:09:00
     ron34         7         returns         inbound      2019-03-24T15:03:00
     bob13         2          returns       inbound      2019-03-25T10:12:00
     bob13         3          returns       outbound     2019-03-27T09:15:00
df = pd.read_csv(io.StringIO(txt), delim_whitespace=1, index_col=False)

grp = df.query('call_direction=="outbound"').\
    groupby(['member_id', 'call_reason'])
df['OutId'] = grp.time_stamp.transform(lambda x: x.rank())

grp = df.groupby(['member_id', 'call_reason'])
df['Id'] = grp.OutId.transform(lambda x: x.bfill())

inbnd_score = df.query('call_direction=="inbound"').\
    groupby(['member_id', 'call_reason', 'Id']).survey_score.last()
outbnd_score = df.query('call_direction=="outbound"').\
    groupby(['member_id', 'call_reason', 'Id']).survey_score.last()

ddf = pd.concat([inbnd_score, outbnd_score], axis=1,
                keys=['inbnd', 'outbnd'])
ddf['score_differential'] = ddf.outbnd - ddf.inbnd


   member_id  survey_score  call_reason call_direction           time_stamp  OutId
0      bob13             0      returns        inbound  2019-03-18T10:12:00    NaN
1       ub40             5    complaint        inbound  2019-03-19T11:12:00    NaN
2      bob13             7      returns       outbound  2019-03-19T09:15:00    1.0
3    todd100             3  order_error        inbound  2019-03-20T10:15:00    NaN
4       ub40             2    complaint        inbound  2019-03-21T12:11:00    NaN
5    todd100             7  order_error       outbound  2019-03-22T08:10:00    1.0
6       ub40             1    complaint       outbound  2019-03-22T11:09:00    1.0
7      ron34             6     exchange        inbound  2019-03-22T13:09:00    NaN
8      ron34             7      returns        inbound  2019-03-24T15:03:00    NaN
9      bob13             2      returns        inbound  2019-03-25T10:12:00    NaN
10     bob13             3      returns       outbound  2019-03-27T09:15:00    2.0

   member_id  survey_score  call_reason call_direction           time_stamp  OutId   Id
0      bob13             0      returns        inbound  2019-03-18T10:12:00    NaN  1.0
1       ub40             5    complaint        inbound  2019-03-19T11:12:00    NaN  1.0
2      bob13             7      returns       outbound  2019-03-19T09:15:00    1.0  1.0
3    todd100             3  order_error        inbound  2019-03-20T10:15:00    NaN  1.0
4       ub40             2    complaint        inbound  2019-03-21T12:11:00    NaN  1.0
5    todd100             7  order_error       outbound  2019-03-22T08:10:00    1.0  1.0
6       ub40             1    complaint       outbound  2019-03-22T11:09:00    1.0  1.0
7      ron34             6     exchange        inbound  2019-03-22T13:09:00    NaN  NaN
8      ron34             7      returns        inbound  2019-03-24T15:03:00    NaN  NaN
9      bob13             2      returns        inbound  2019-03-25T10:12:00    NaN  2.0
10     bob13             3      returns       outbound  2019-03-27T09:15:00    2.0  2.0

                           inbnd  outbnd  score_differential
member_id call_reason Id
bob13     returns     1.0      0       7                   7
                      2.0      2       3                   1
todd100   order_error 1.0      3       7                   4
ub40      complaint   1.0      2       1                  -1