这些是我的数据框:
# data
set.seed(1234321)
# Original data frame (i.e. a questionnaire survey data)
answer <- c("Yes", "No")
likert_scale <- c("strongly disagree", "disagree", "undecided", "agree", "strongly agree")
d1 <- c(rnorm(10)*10)
d2 <- sample(x = c(letters), size = 10, replace = TRUE)
d3 <- sample(x = likert_scale, size = 10, replace = TRUE)
d4 <- sample(x = likert_scale, size = 10, replace = TRUE)
d5 <- sample(x = likert_scale, size = 10, replace = TRUE)
d6 <- sample(x = answer, size = 10, replace = TRUE)
d7 <- sample(x = answer, size = 10, replace = TRUE)
original_df <- data.frame(d1, d2, d3, d4, d5, d6, d7)
# Questionnaire codebook data frame
quest_section <- c("generic", "likert scale", "specific approval")
starting_column <- c(1, 3, 6)
ending_column <- c(2, 5, 7)
df_codebook <- data.frame(quest_section, min_column, max_column)
我想基于quest_section
中的df_codebook
变量将原始数据帧拆分为不同的数据帧,使用starting_column
和ending_column
作为指标来选择其中的列original_df
。
这就是我尝试创建函数以拆分original_df
的原因:
# splitting dataframe function
split_df <- function(my_df, my_codebook) {
df_names <- df_codebook[,1] %>%
map(set_names)
for (i in 1:length(df_codebook[,1])) {
df_names$`[i]` <- original_df %>%
dplyr::select(df_codebook[[2]][i]:df_codebook[[3]][i])
}
}
# apply function to two dataframes
my_df_list <- split_df(my_df = original_df, my_codebook = df_codebook)
,结果是一个NULL
对象,而不是以下列表:
> my_df_list
$generic
d1 d2
1 12.369081 z
2 15.616230 x
3 18.396185 f
4 3.173245 q
5 10.715115 j
6 -11.459955 p
7 2.488894 j
8 1.158625 n
9 26.200816 a
10 12.624048 b
$`likert scale`
d3 d4 d5
1 disagree strongly agree strongly agree
2 undecided undecided strongly disagree
3 strongly agree undecided strongly disagree
4 agree undecided undecided
5 strongly disagree agree undecided
6 disagree strongly disagree undecided
7 disagree agree disagree
8 disagree strongly disagree undecided
9 undecided strongly disagree disagree
10 strongly disagree disagree strongly agree
$`specific approval`
d6 d7
1 No No
2 No No
3 Yes No
4 Yes Yes
5 Yes Yes
6 Yes Yes
7 Yes No
8 No Yes
9 No No
10 No Yes
我对任何一种解决方案都感兴趣:使用tidyverse
和purrr
方法或功能性方法。
答案 0 :(得分:1)
您可以使用Map
在starting_column
:ending_column
的每个之间创建一个序列,并使用该序列从original_df
中提取相关列。我们可以使用setNames
为列表分配名称。
setNames(Map(function(x, y) original_df[, x:y],
df_codebook$starting_column, df_codebook$ending_column),
df_codebook$quest_section)
这将返回
#$generic
# d1 d2
#1 12.369081 z
#2 15.616230 x
#3 18.396185 f
#4 3.173245 q
#5 10.715115 j
#6 -11.459955 p
#7 2.488894 j
#8 1.158625 n
#9 26.200816 a
#10 12.624048 b
#$`likert scale`
# d3 d4 d5
#1 disagree strongly agree strongly agree
#2 undecided undecided strongly disagree
#3 strongly agree undecided strongly disagree
#4 agree undecided undecided
#5 strongly disagree agree undecided
#6 disagree strongly disagree undecided
#7 disagree agree disagree
#8 disagree strongly disagree undecided
#9 undecided strongly disagree disagree
#10 strongly disagree disagree strongly agree
#$`specific approval`
# d6 d7
#1 No No
#2 No No
#3 Yes No
#4 Yes Yes
#5 Yes Yes
#6 Yes Yes
#7 Yes No
#8 No Yes
#9 No No
#10 No Yes
答案 1 :(得分:1)
尝试这种tidyverse
方法:
library(tidyverse)
#Data
# data
set.seed(1234321)
# Original data frame (i.e. a questionnaire survey data)
answer <- c("Yes", "No")
likert_scale <- c("strongly disagree", "disagree", "undecided", "agree", "strongly agree")
d1 <- c(rnorm(10)*10)
d2 <- sample(x = c(letters), size = 10, replace = TRUE)
d3 <- sample(x = likert_scale, size = 10, replace = TRUE)
d4 <- sample(x = likert_scale, size = 10, replace = TRUE)
d5 <- sample(x = likert_scale, size = 10, replace = TRUE)
d6 <- sample(x = answer, size = 10, replace = TRUE)
d7 <- sample(x = answer, size = 10, replace = TRUE)
original_df <- data.frame(d1, d2, d3, d4, d5, d6, d7)
# Questionnaire codebook data frame
quest_section <- c("generic", "likert scale", "specific approval")
starting_column <- c(1, 3, 6)
ending_column <- c(2, 5, 7)
df_codebook <- data.frame(quest_section, starting_column, ending_column)
您可以重塑数据,根据起点和终点位置进行合并,然后重塑为宽幅:
#Code for data
Data <- original_df %>%
mutate(id=row_number()) %>%
mutate(across(-id,~as.character(.)))%>%
pivot_longer(-id) %>%
arrange(name) %>%
mutate(Key=as.numeric(gsub('d','',name))) %>%
left_join(
df_codebook %>% pivot_longer(-quest_section) %>% rename(Key=value) %>% select(-name)
) %>% fill(quest_section)
#Split
List <- split(Data,Data$quest_section)
#Function to re process
myfun <- function(x)
{
y <- x %>% select(-c(quest_section,Key)) %>%
pivot_wider(names_from = name,values_from=value) %>% select(-id)
if(any(names(y)=='d1')) {y$d1 <- as.numeric(y$d1)}
return(y)
}
#Apply
List2 <- map(List, myfun)
输出:
List2
$generic
# A tibble: 10 x 2
d1 d2
<dbl> <chr>
1 12.4 z
2 15.6 x
3 18.4 f
4 3.17 q
5 10.7 j
6 -11.5 p
7 2.49 j
8 1.16 n
9 26.2 a
10 12.6 b
$`likert scale`
# A tibble: 10 x 3
d3 d4 d5
<chr> <chr> <chr>
1 disagree strongly agree strongly agree
2 undecided undecided strongly disagree
3 strongly agree undecided strongly disagree
4 agree undecided undecided
5 strongly disagree agree undecided
6 disagree strongly disagree undecided
7 disagree agree disagree
8 disagree strongly disagree undecided
9 undecided strongly disagree disagree
10 strongly disagree disagree strongly agree
$`specific approval`
# A tibble: 10 x 2
d6 d7
<chr> <chr>
1 No No
2 No No
3 Yes No
4 Yes Yes
5 Yes Yes
6 Yes Yes
7 Yes No
8 No Yes
9 No No
10 No Yes