在条件中使用mut in mutate

时间:2016-07-23 02:02:46

标签: r dplyr

问题

我想基于条件的计算改变我的数据集,而不过滤原始数据集。

实施例

数据帧

df <- data.frame(amounts = c("2.95", "2.95", "14.95", "14.95", 
    "14.95", "-14.95", "-14.95", "-14.95", "-14.95"), 
    operation_code = c(100, 100, 101, 101, 101, 110, 110, 110, 110), 
    user_id = c(999, 111, 999, 111, 999, 111, 111, 999, 999))

这是我想要的突变,但它按operation code == 110过滤。我希望保留data.frame中的所有数据,并仅为fees operation_type填充110

df <- df %>% 
    group_by(user_id) %>%
    filter(operation_code == 110) %>%
    mutate(fees = n() * 20)

这就是结果应该是这样的:

| amounts | operation_code | user_id | fees |
|---------|----------------|---------|------|
| 2.95    | 100            | 999     | NA   |
| 2.95    | 100            | 111     | NA   |
| 14.95   | 101            | 999     | NA   |
| 14.95   | 101            | 111     | NA   |
| 14.95   | 101            | 999     | NA   |
| -14.95  | 110            | 111     | 40   |
| -14.95  | 110            | 111     | 40   |
| -14.95  | 110            | 999     | 40   |
| -14.95  | 110            | 999     | 40   |

1 个答案:

答案 0 :(得分:2)

除了Hack-R的原始直接解决方案:

df <- data.frame(amounts = c("2.95", "2.95", "14.95", "14.95", "14.95", "-14.95", "-14.95", "-14.95", "-14.95"), operation_code = c(100, 100, 101, 101, 101, 110, 110, 110, 110), user_id = c(999, 111, 999, 111, 999, 111, 111, 999, 999))
require(dplyr)

df$fees <- NA
df[df$operation_code==110,] <- df[df$operation_code==110,] %>% 
                                      group_by(user_id) %>%
                                      mutate(fees = n() * 20)
df
#  amounts operation_code user_id  fees
#   <fctr>          <dbl>   <dbl> <dbl>
#1    2.95            100     999    NA
#2    2.95            100     111    NA
#3   14.95            101     999    NA
#4   14.95            101     111    NA
#5   14.95            101     999    NA
#6  -14.95            110     111    40
#7  -14.95            110     111    40
#8  -14.95            110     999    40
#9  -14.95            110     999    40

获得此结果的替代方法包括:

按'user_id'分组后,我们可以使用ifelse创建'费用'列。

df %>%
   group_by(user_id) %>%
   mutate(fees = ifelse(operation_code == 110, sum(operation_code==110)*20, NA_real_))
#  amounts operation_code user_id  fees
#   <fctr>          <dbl>   <dbl> <dbl>
#1    2.95            100     999    NA
#2    2.95            100     111    NA
#3   14.95            101     999    NA
#4   14.95            101     111    NA
#5   14.95            101     999    NA
#6  -14.95            110     111    40
#7  -14.95            110     111    40
#8  -14.95            110     999    40
#9  -14.95            110     999    40

或者不使用ifelse

df %>%
   group_by(user_id) %>% 
   mutate(fees = sum(operation_code==110)*20 * NA^(operation_code!=110))

注意:这使用dplyr方法获取预期输出。

data.table的紧凑选项是

library(data.table)
setDT(df)[operation_code == 110, fees := .N * 20 , user_id]
df
#   amounts operation_code user_id fees
#1:    2.95            100     999   NA
#2:    2.95            100     111   NA
#3:   14.95            101     999   NA
#4:   14.95            101     111   NA
#5:   14.95            101     999   NA
#6:  -14.95            110     111   40
#7:  -14.95            110     111   40
#8:  -14.95            110     999   40
#9:  -14.95            110     999   40

或者我们可以使用base R方法

with(df, ave(operation_code, user_id, FUN = 
       function(x) ifelse(x ==110, sum(x==110)*20, NA) ))
#[1] NA NA NA NA NA 40 40 40 40