我有两个数据集,我正在尝试合并在一起。第一个包含具有唯一ID(按行)的每个测试主题的信息。第二组包含每个测试对象的测量值(列中),但每个受试者测量两次,因此唯一ID读取“IDa和IDb”。我想找到一种基于唯一ID合并这两个表的方法,无论它是测量A还是B。
这是2个数据集的一小部分样本,以及预期输出的表格。任何帮助将不胜感激!
UniqueID Site State Age Height
Tree001 FK OR 23 70
Tree002 FK OR 45 53
Tree003 NM OR 35 84
UniqueID Tree001A Tree001B Tree002A Tree002B Tree003A Tree003B
1996 4 2
1997 7 8 7 3
1998 3 2 9 4 7
1999 11 9 2 12 3 13
2010 8 8 4 6 11 4
2011 10 5 6 3 8 9
UniqueID Tree001A Tree001B Tree002A Tree002B Tree003A Tree003B
Site FK FK FK FK NM NM
State OR OR OR OR OR OR
Age 23 23 45 45 35 35
Height 70 70 53 53 84 84
1996 4 2
1997 7 8 7 3
1998 3 2 9 4 7
1999 11 9 2 12 3 13
2010 8 8 4 6 11 4
2011 10 5 6 3 8 9
答案 0 :(得分:1)
这可以是一种方法。
df1 <- structure(list(UniqueID = structure(1:3, .Label = c("Tree001",
"Tree002", "Tree003"), class = "factor"), Site = structure(c(1L,
1L, 2L), .Label = c("FK", "NM"), class = "factor"), State = structure(c(1L,
1L, 1L), .Label = "OR", class = "factor"), Age = c(23L, 45L,
35L), Height = c(70L, 53L, 84L)), .Names = c("UniqueID", "Site",
"State", "Age", "Height"), class = "data.frame", row.names = c(NA,
-3L))
df2 <- structure(list(UniqueID = c(1996L, 1997L, 1998L, 1999L, 2010L,
2011L), Tree001A = c(4L, 7L, 3L, 11L, 8L, 10L), Tree001B = c(NA,
8L, 2L, 9L, 8L, 5L), Tree002A = c(2L, 7L, 9L, 2L, 4L, 6L), Tree002B = c(NA,
NA, 4L, 12L, 6L, 3L), Tree003A = c(NA, 3L, 7L, 3L, 11L, 8L),
Tree003B = c(NA, NA, NA, 13L, 4L, 9L)), .Names = c("UniqueID",
"Tree001A", "Tree001B", "Tree002A", "Tree002B", "Tree003A", "Tree003B"
), class = "data.frame", row.names = c(NA, -6L))
> df1
UniqueID Site State Age Height
1 Tree001 FK OR 23 70
2 Tree002 FK OR 45 53
3 Tree003 NM OR 35 84
> df2
UniqueID Tree001A Tree001B Tree002A Tree002B Tree003A Tree003B
1 1996 4 <NA> 2 <NA> <NA> <NA>
2 1997 7 8 7 <NA> 3 <NA>
3 1998 3 2 9 4 7 <NA>
4 1999 11 9 2 12 3 13
5 2010 8 8 4 6 11 4
6 2011 10 5 6 3 8 9
# Use transpose function to change df1
df3 <- as.data.frame(t(df1[,-1]))
colnames(df3) <- df1[,1]
# Change rownames to UniqueID
df3$UniqueID <- rownames(df3)
# ROwnames to numeric
rownames(df3) <- c(1:4)
# Modify dataframe so that you have two columns for each subject
df3 <- df3[,c(4,1,1,2,2,3,3)]
colnames(df3) <- c("UniqueID", "Tree001A", "Tree001B", "Tree002A",
"Tree002B", "Tree003A", "Tree003B")
# Change classes of columns of df2 to factor
df2 <- data.frame(sapply(df2,function(x) class(x)<- as.factor(x)))
# Now combine two data frames
new <- rbind(df3,df2)
> new
UniqueID Tree001A Tree001B Tree002A Tree002B Tree003A Tree003B
1 Site FK FK FK FK NM NM
2 State OR OR OR OR OR OR
3 Age 23 23 45 45 35 35
4 Height 70 70 53 53 84 84
5 1996 4 <NA> 2 <NA> <NA> <NA>
6 1997 7 8 7 <NA> 3 <NA>
7 1998 3 2 9 4 7 <NA>
8 1999 11 9 2 12 3 13
9 2010 8 8 4 6 11 4
10 2011 10 5 6 3 8 9