
时间:2017-03-31 17:27:18

标签: r data.table


1)按Client_ID分组 2)计算所有ClaimNumbers(无论是否与DS关联)
3)仅计算DS的索赔号 4)仅用于DS的零售和WS 5)另外,我想算一次索赔。在数据中,将为每个服务年份和服务重复一个索赔号。

 # example

 ds <- read.table(text = "
 Client_ID ClaimNumber ServiceYr Service Retail WS
A00002         WC1      2012      DS    100 25
A00002         WC1      2013      DS    100 25
A00002         WC1      2014      BR     50 10
A00002         WC2      2014      BR     50 10
A00002         WC3      2014      BR     50 10
A00003         WC4      2014      BR     50 10
A00003         WC4      2015      BR     50 10
A00003         WC5      2015      BR     50 10
A00003         WC5      2016      BR     50 10
A00003         WC6      2016      DS    100 25",

 # group by client ID and claim number to get one row per claim number
 total_claims <- sqldf("select Client_ID,ClaimNumber from ds group 
                  by Client_ID,ClaimNumber")

 # For DS claims only - group by client ID and claim number 
 # to get one row per claim number
 ds_claims <- sqldf("select Client_ID,ClaimNumber, sum(Retail) as Retail, 
   sum(WS) as WS from ds where Service='DS' group by Client_ID,ClaimNumber")

 # count the total number of claims by client
 total_counts <-      aggregate(total_claims[,2],b=list(total_claims$Client_ID),FUN=length)

 # fix column headers
 colnames(total_counts)[1:2] <- c("Client_ID","ClaimCount")

 # count the number of DS claims by client
 ds_claim_counts <- aggregate(ds_claims[,2],b=list(ds_claims$Client_ID),FUN=length)

 # fix column headers
 colnames(ds_claim_counts)[1:2] <- c("Client_ID","ClaimCount")

 # merge to get both total counts and ds counts on the same table
 total <- merge(total_counts,ds_claim_counts, by="Client_ID",all.x=TRUE)

 # merge to add ds retail and ws amounts to total table
 total <- merge(total,ds_claims[,c(1,3,4)], by="Client_ID",all.x=TRUE)

 # fix column headers
 colnames(total)[2:3] <- c("Total_CC","DS_CC")

1 个答案:

答案 0 :(得分:2)




sqldf("select Client_ID,
              count(distinct ClaimNumber) Total_CC,
              count(distinct case when Service = 'DS' 
                               then ClaimNumber 
                               else NULL 
                             end) DS_CC,
              sum(Retail * (Service = 'DS')) Retail,
              sum(WS * (Service = 'DS')) WS
       from ds
       group by Client_ID")


  Client_ID Total_CC DS_CC Retail WS
1    A00002        3     1    200 50
2    A00003        3     1    100 25



DT <- as.data.table(ds)
DT[, list(Total_CC = length(unique(ClaimNumber)),
          DS_CC = length(unique(ClaimNumber[Service == "DS"])),
          Retail = sum(Retail * (Service == "DS")),
          WS = sum(WS * (Service == "DS"))), by = Client_ID]


   Client_ID Total_CC DS_CC Retail WS
1:    A00002        3     1    200 50
2:    A00003        3     1    100 25



ds %>% 
   group_by(Client_ID) %>%
   summarize(Total_CC = length(unique(ClaimNumber)),
          DS_CC = length(unique(ClaimNumber[Service == "DS"])),
          Retail = sum(Retail * (Service == "DS")),
          WS = sum(WS * (Service == "DS"))) %>%


# A tibble: 2 × 5
  Client_ID Total_CC DS_CC Retail    WS
     <fctr>    <int> <int>  <int> <int>
1    A00002        3     1    200    50
2    A00003        3     1    100    25