我有一个由lapply和split操作生成的19个嵌套列表。 这些列表的格式如下:
#list1
Var col1 col2 col3
A 2 3 4
B 3 4 5
#list2
Var col1 col2 col3
A 5 6 7
B 5 4 4
......
#list19
Var col1 col2 col3
A 3 6 7
B 7 4 4
我已经能够将列表与
合并merge.all <- function(x, y) merge(x, y, all=TRUE, by="Var")
out <- Reduce(merge.all, DataList)
然而,由于其他列的名称相似,我收到错误。
如何将列表名称连接到变量名称,以便得到类似的内容:
Var list1.col1 list1.col2 list1.col3 .......... list19.col3
A 2 3 4 7
B 3 4 5 .......... 4
答案 0 :(得分:3)
我确信有人会想出一个更好,更好的解决方案。但是,如果您在快速而肮脏的解决方案之后,这似乎有效。
我的计划是在合并之前简单地更改列名。
#Sample Data
df1 <- data.frame(Var = c("A","B"), col1 = c(2,3), col2 = c(3,4), col3 = c(4,5))
df2 <- data.frame(Var = c("A","B"), col1 = c(5,5), col2 = c(6,4), col3 = c(7,5))
df19 <- data.frame(Var = c("A","B"), col1 = c(3,7), col2 = c(6,4), col3 = c(7,4))
mylist <- list(df1, df2, df19)
names(mylist) <- c("df1", "df2", "df19") #just manually naming, presumably your list has names
## Change column names by pasting name of dataframe in list with standard column names. - using ugly mix of `lapply` and a `for` loop:
mycolnames <- colnames(df1)
mycolnames1 <- lapply(names(mylist), function(x) paste0(x, mycolnames))
for(i in 1:length(mylist)){
colnames(mylist[[i]]) <- mycolnames1[[i]]
colnames(mylist[[i]])[1] <- "Var" #put Var back in so you can merge
}
## Merge
merge.all <- function(x, y)
merge(x, y, all=TRUE, by="Var")
out <- Reduce(merge.all, mylist)
out
# Var df1col1 df1col2 df1col3 df2col1 df2col2 df2col3 df19col1 df19col2 df19col3
#1 A 2 3 4 5 6 7 3 6 7
#2 B 3 4 5 5 4 5 7 4 4
你去了 - 它有效,但非常难看。
答案 1 :(得分:2)
要将数据框名称设置为唯一,您可以使用函数将所有不是合并变量的列表名称设置为唯一名称。
resetNames <- function(x, byvar = "Var") {
asrl <- as.relistable(lapply(x, names))
allnm <- names(unlist(x, recursive = FALSE))
rpl <- replace(allnm, unlist(asrl) %in% byvar, byvar)
Map(setNames, x, relist(rpl, asrl))
}
Reduce(merge.all, resetNames(dlist))
# Var list1.col1 list1.col2 list1.col3 list2.col1 list2.col2 list2.col4 list3.col1
#1 A 2 3 4 5 6 7 3
#2 B 3 4 5 5 4 4 7
# list3.col2 list3.col3 list4.col1 list4.col2 list4.col3
#1 6 7 3 6 7
#2 4 4 4 5 6
运行带有添加数据框的列表时,没有警告。并且始终有数据表。它的合并方法不会返回重复列名的警告。
library(data.table)
Reduce(merge.all, lapply(dlist, as.data.table))
另一种选择是在数据进入函数时检查名称,在那里更改它们,然后您可以避免警告。这并不完美,但在这里工作正常。
merge.all <- function(x, y) {
m <- match(names(y)[-1], gsub("[.](x|y)$", "", names(x)[-1]), 0L)
names(y)[-1][m] <- paste0(names(y)[-1][m], "DUPE")
merge(x, y, all=TRUE, by="Var")
}
rm <- Reduce(merge.all, dlist)
names(rm)
# [1] "Var" "col1" "col2" "col3" "col1DUPE.x"
# [6] "col2DUPE.x" "col4" "col1DUPE.y" "col2DUPE.y" "col3DUPE.x"
# [11] "col1DUPE" "col2DUPE" "col3DUPE.y"
其中dlist
是
structure(list(list1 = structure(list(Var = structure(1:2, .Label = c("A",
"B"), class = "factor"), col1 = 2:3, col2 = 3:4, col3 = 4:5), .Names = c("Var",
"col1", "col2", "col3"), class = "data.frame", row.names = c(NA,
-2L)), list2 = structure(list(Var = structure(1:2, .Label = c("A",
"B"), class = "factor"), col1 = c(5L, 5L), col2 = c(6L, 4L),
col4 = c(7L, 4L)), .Names = c("Var", "col1", "col2", "col4"
), class = "data.frame", row.names = c(NA, -2L)), list3 = structure(list(
Var = structure(1:2, .Label = c("A", "B"), class = "factor"),
col1 = c(3L, 7L), col2 = c(6L, 4L), col3 = c(7L, 4L)), .Names = c("Var",
"col1", "col2", "col3"), class = "data.frame", row.names = c(NA,
-2L)), list4 = structure(list(Var = structure(1:2, .Label = c("A",
"B"), class = "factor"), col1 = 3:4, col2 = c(6L, 5L), col3 = c(7L,
6L)), .Names = c("Var", "col1", "col2", "col3"), row.names = c(NA,
-2L), class = "data.frame")), .Names = c("list1", "list2", "list3",
"list4"))