R-将列转换为单因子水平

时间:2019-04-23 10:52:06

标签: r data-structures

我有以下问题。我已经完成测试,在该测试中有一组样本(“ Sample”),不同类别(“ Individual_class”)为其写下分数(“ Score_individual_class”) ,并且我还为每个样本给出了最终分数(“ Final_Score_Sample”)。这是一个示例:

df<-data.frame(Sample = c(1,1,2,2,3,3),
               Individual_class = c("A","B","A","B","A","B"),
               Score_individual_class = c(10,20,50,80, 30,60),
               Final_Score_Sample = c(80,80,90,90,120,120)

)
  Sample       Individual_class   Score_individual_class  Final_Score_Sample
      1                A                     10                 80
      1                B                     20                 80
      2                A                     50                 90
      2                B                     80                 90
      3                A                     30                120
      3                B                     60                120

现在,我希望Final_Score_Sample处于Score_individual_class = C的Individual_class之间。

  Sample       Individual_class    Score_individual_class
      1                A                     10
      1                B                     20
      1                C                     80
      2                A                     50
      2                B                     80
      2                C                     90
      3                A                     30
      3                B                     60
      3                C                    120

我已经尝试过了:

df_test<-df[,c("Sample", "Final_Score_Sample")]
df_test<- df_test[!duplicated(df_test), ]
df_test$Individual_class<-"C"
df_test$Individual_class<-as.factor(df_test$Individual_class)
names(df_test)[names(df_test)=="Final_Score_Sample"] <- "Score_individual_class"
df_final<-left_join(df, df_test)

但这没用。有人可以帮我吗?

4 个答案:

答案 0 :(得分:2)

在基础R中实现:

# Your data
df<-data.frame(Sample = c(1,1,2,2,3,3),
               Individual_class = c("A","B","A","B","A","B"),
               Score_individual_class = c(10,20,50,80, 30,60),
               Final_Score_Sample = c(80,80,90,90,120,120))

# Data with correct format
df1 <- df[,1:3]

# Data with incorrect format
df2 <- unique(df[,c(1,4)])
df2$Individual_class <- "C"
colnames(df2)[2] <- "Score_individual_class"

# Gathering data again
df <- rbind(df1, df2)

# Sorting 
df <- df[order(df$Sample, df$Individual_class),]

# Result
  Sample Individual_class Score_individual_class
1      1                A                     10
2      1                B                     20
7      1                C                     80
3      2                A                     50
4      2                B                     80
8      2                C                     90
5      3                A                     30
6      3                B                     60
9      3                C                    120

答案 1 :(得分:1)

用以下代码替换代码的最后一行:

df <- rbind(df[c(1:3)],df_test)

然后对df进行排序:

df <- df[order(df$Sample,df$Individual_class),]

答案 2 :(得分:1)

结果

p = t; // p = &t[0]
p += 2; // p = &t[2]
p += p[-1]; // p += 2; // p = &t[4]

代码

  Sample Individual_class Score_individual_class
1      1                A                     10
2      1                B                     20
3      1                C                     80
4      2                A                     50
5      2                B                     80
6      2                C                     90
7      3                A                     30
8      3                B                     60
9      3                C                    120

数据

df %>%
    select(1:3) %>%
    bind_rows(df %>% 
                  select(1, 4) %>%
                  group_by(Sample) %>%
                  summarize(Score_individual_class = first(Final_Score_Sample)) %>%
                  mutate(Individual_class = "C")) %>%
    arrange(Sample)

答案 3 :(得分:0)

尝试一下:

     library(tidyverse)

     df <- bind_rows(df %>% 
            dplyr::select(Sample,Individual_class,Score_individual_class),
           df %>% 
           dplyr::select(Sample,Final_Score_Sample) %>% 
            mutate(Individual_class = "C") %>% 
            rename(Score_individual_class = Final_Score_Sample)) %>% 
           arrange(Sample)