按组将最大值添加到df中

时间:2019-12-01 01:05:39

标签: r

我试图首先通过某些分组norm.knnN$gearbox[norm.knnN$gearbox=="automatic"] = 1 norm.knnN$gearbox[norm.knnN$gearbox=="manual"] = 0 norm.knnN$gearbox = as.numeric(norm.knnN$gearbox) norm.knnN$bus= ifelse(norm.knnN$vehicleType=="bus",1,0) norm.knnN$cabrio= ifelse(norm.knnN$vehicleType=="cabrio",1,0) norm.knnN$coupe= ifelse(norm.knnN$vehicleType=="coupe",1,0) norm.knnN$limousine= ifelse(norm.knnN$vehicleType=="limousine",1,0) norm.knnN$otherCar= ifelse(norm.knnN$vehicleType=="other",1,0) norm.knnN$small_car= ifelse(norm.knnN$vehicleType=="small_car",1,0) norm.knnN$station_wagon= ifelse(norm.knnN$vehicleType=="station_wagon",1,0) norm.knnN$suv= ifelse(norm.knnN$vehicleType=="suv",1,0) norm.knnN$vehicleType = NULL norm.knnN$cng= ifelse(norm.knnN$fuelType=="cng",1,0) norm.knnN$diesel= ifelse(norm.knnN$fuelType=="diesel",1,0) norm.knnN$electric= ifelse(norm.knnN$fuelType=="electric",1,0) norm.knnN$hybrid= ifelse(norm.knnN$fuelType=="hybrid",1,0) norm.knnN$lpg= ifelse(norm.knnN$fuelType=="lpg",1,0) norm.knnN$otherFuel= ifelse(norm.knnN$fuelType=="other",1,0) norm.knnN$petrol= ifelse(norm.knnN$fuelType=="petrol",1,0) norm.knnN$fuelType = NULL norm.knnN$audi= ifelse(norm.knnN$brand=="audi",1,0) norm.knnN$bmw= ifelse(norm.knnN$brand=="bmw",1,0) norm.knnN$mercedes_benz= ifelse(norm.knnN$brand=="mercedes_benz",1,0) norm.knnN$opel= ifelse(norm.knnN$brand=="opel",1,0) norm.knnN$volkswagen= ifelse(norm.knnN$brand=="volkswagen",1,0) norm.knnN$brand = NULL norm.knnN$notRepairedDamage[norm.knnN$notRepairedDamage=="yes"] = 1 norm.knnN$notRepairedDamage[norm.knnN$notRepairedDamage=="no"] = 0 norm.knnN$notRepairedDamage = as.numeric(norm.knnN$notRepairedDamage) ``` 来找到最大值value,然后我需要使用该分组ID的提取值来创建一列。

类似:

id

我收到以下错误:

  

df$maxdvalue <-aggregate(value ~ id, data = df, max) $<-.data.frame中的错误,最大值,值= list(id = 1:1763,:         替换有1763行,数据有74619

1 个答案:

答案 0 :(得分:0)

有很多方法可以做到这一点,由于您是R / stackoverflow的新手,所以我希望我早些时候就知道几种方法:

# Groupwise aggregation 
# (note: the aggregate function applied can be changed to whatever is required) 

  # 1st base R method using "ave" function, assigned as vector:

  df$maxmpg <- ave(df$mpg, df$car_brand, FUN = max)

  # 2nd base R method, transforming dataframe using  "ave" function, assigned as dataframe: 

  df <- transform(df, maxmpg = ave(mpg, car_brand, FUN = max))

  # 3rd method using with syntax in conjunction with "ave", assigned as vector:  

  df$maxmpg <- with(df, ave(mpg, car_brand, FUN = max))

  # 4th method using cbind in conjunction with "ave", assigned as dataframe:  

  df <- cbind(df, maxmpg = ave(df$mpg, df$car_brand, FUN = max))

  # 5th method using tapply, assigned as vector:  

  df$maxmpg <- tapply(df$mpg, df$car_brand, max)

  # 6th base R method using lapply with lambda function  
  # and row-binding the each list element back into a df (assigned as df): 

  df <- do.call("rbind", lapply(split(df, df$car_brand), 

                                function(x){

                                  x$maxmpg <- max(x$mpg)

                                  return(x)

                                }

                              )
                          )

  # 7th base R solution using aggregate and merge (assigned as df): 

  df <- merge(df, setNames(aggregate(mpg ~ car_brand, df, FUN = max),

                           c("car_brand", "max_mpg")), by = "car_brand", all.x = TRUE)


  # Using pacakges: 

  # Create a vector of the required packages: 

  necessary_packages <- c("dplyr", "data.table")

  # Create a vector containing the names of any packages requiring installation: 

  new_packages <- necessary_packages[!(necessary_packages %in% installed.packages()[,"Package"])]

  # If the vector has more than 0 elements, install the new pacakges
  # (and it's/their) associated dependencies: 

  if(length(new_packages) > 0){

    install.packages(new_packages, dependencies = TRUE)

  }

  # Initialise the packages in the session: 

  lapply(necessary_packages,

         require,

         character.only = TRUE)

  # 8th method using dplyr, assigned as vector:

  df <- 

    df %>% 

    group_by(car_brand) %>% 

    mutate(maxmpg = max(mpg)) %>%

    ungroup()

# 9th solution using data.table (set as vector): 

dt <- data.table(df)

dt[, maxmpg := max(mpg), by = car_brand]

使用的数据:

df <- data.frame(car_type = row.names(mtcars),

                 car_brand = gsub(" .*", "", row.names(mtcars)),

                 mtcars, 

                 row.names = NULL)