Question

mylist = list(list(list(NULL, 2, 2), list(2, 3, NULL)), list(list(NULL, NULL, 2), list(NULL, 3, NULL)), list(list(2, 3, 2), list(NULL, 3, NULL)))

> mylist
[[1]]
[[1]][[1]]
[[1]][[1]][[1]]
NULL

[[1]][[1]][[2]]
[1] 2

[[1]][[1]][[3]]
[1] 2


[[1]][[2]]
[[1]][[2]][[1]]
[1] 2

[[1]][[2]][[2]]
[1] 3

[[1]][[2]][[3]]
NULL



[[2]]
[[2]][[1]]
[[2]][[1]][[1]]
NULL

[[2]][[1]][[2]]
NULL

[[2]][[1]][[3]]
[1] 2


[[2]][[2]]
[[2]][[2]][[1]]
NULL

[[2]][[2]][[2]]
[1] 3

[[2]][[2]][[3]]
NULL



[[3]]
[[3]][[1]]
[[3]][[1]][[1]]
[1] 2

[[3]][[1]][[2]]
[1] 3

[[3]][[1]][[3]]
[1] 2


[[3]][[2]]
[[3]][[2]][[1]]
NULL

[[3]][[2]][[2]]
[1] 3

[[3]][[2]][[3]]
NULL

mylist是一个列表对象，其中包含i = 3个列表。在每个i列表中，有j = 2个列表。在每个j列表中，都有k = 3个元素。

可以使用i, j, k对对象编制索引。例如，mylist[[1]][[1]][[1]] = NULL。

我有兴趣了解每个NULL层中每个k层内j的比例。

Stratum j = 1

对于j = 1和k = 1，NULL的比例是2/3，因为[[1]][[1]][[1]]和[[2]][[1]][[1]]是NULL。
对于j = 1和k = 2，NULL的比例为1/3，因为[[2]][[1]][[2]]为NULL。
对于j = 1和k = 3，NULL的比例为0.

Stratum j = 2

对于j = 2和k = 1，NULL的比例是2/3，因为[[1]][[2]][[1]]和[[2]][[2]][[1]]是NULL。
对于j = 2和k = 2，NULL的比例为0
对于j = 2和k = 3，NULL的比例为1，因为[[1]][[2]][[3]]，[[2]][[2]][[3]]和[[3]][[2]][[3]]都是NULL }。

换句话说，我应该得到6个比例作为输出。是否有一种有效的方法来轻松访问这些列表元素而无需编写循环？我试过了apply(mylist, 2, function(x){sum(is.null(x))/length(x)})，但这似乎没有用。

Answer 1

purrr提供了一些很好的实用程序来处理可以使这种任务更简单的列表。使用0.2.3，

library(purrr)

mylist = list(list(list(NULL, 2, 2), list(2, 3, NULL)), 
              list(list(NULL, NULL, 2), list(NULL, 3, NULL)), 
              list(list(2, 3, 2), list(NULL, 3, NULL)))

mylist %>% 
    transpose() %>%    # flip each j element into an i element
    modify_depth(-1, is.null) %>%    # check if each leaf element is null
    # for each grouped set of j elements, iterate in parallel over k elements to find proportion TRUE
    map(pmap, ~sum(...) / length(c(...))) %>% 
    str()    # print nicely
#> List of 2
#>  $ :List of 3
#>   ..$ : num 0.667
#>   ..$ : num 0.333
#>   ..$ : num 0
#>  $ :List of 3
#>   ..$ : num 0.667
#>   ..$ : num 0
#>   ..$ : num 1

也就是说，如果您的数据实际上是矩形的，最好将NULL值转换为NA，以便将其放入矩阵，data.frame或数组中，例如

mylist %>% 
    modify_depth(-1, ~.x %||% NA_real_) %>% 
    unlist() %>% 
    matrix(ncol = 3) %>% 
    is.na() %>% 
    {rowSums(.) / ncol(.)}
#> [1] 0.6666667 0.3333333 0.0000000 0.6666667 0.0000000 1.0000000

将它转换为整洁的data.frame是更多的工作，但一旦完成使操作变得非常简单：

library(tidyverse)

mydf <- mylist %>% 
    modify_depth(-1, ~.x %||% NA_real_) %>%    # change NULL to NA
    set_names(paste0('i', 1:3)) %>%    # add names to become column names
    modify_depth(2, flatten_dbl) %>%    # flatten k values to vector
    as_data_frame() %>% 
    mutate(j = row_number()) %>%    # keep j index (which row each element is in)
    unnest() %>%    # expand
    group_by(j) %>% mutate(k = row_number()) %>%    # add k index
    gather(i, value, i1:i3) %>%    # reshape to long form
    mutate(i = parse_number(i)) %>%    # clean k values
    select(i, j, k, value) %>% ungroup()    # clean up

mydf %>% str()
#> Classes 'tbl_df', 'tbl' and 'data.frame':    18 obs. of  4 variables:
#>  $ i    : num  1 1 1 1 1 1 2 2 2 2 ...
#>  $ j    : int  1 1 1 2 2 2 1 1 1 2 ...
#>  $ k    : int  1 2 3 1 2 3 1 2 3 1 ...
#>  $ value: num  NA 2 2 2 3 NA NA NA 2 NA ...

mydf %>% 
    group_by(j, k) %>% 
    summarise(k_null = sum(is.na(value) / n()))
#> # A tibble: 6 x 3
#> # Groups:   j [?]
#>       j     k    k_null
#>   <int> <int>     <dbl>
#> 1     1     1 0.6666667
#> 2     1     2 0.3333333
#> 3     1     3 0.0000000
#> 4     2     1 0.6666667
#> 5     2     2 0.0000000
#> 6     2     3 1.0000000

如果您愿意，可以使用数组：

myarray <- mylist %>% 
    modify_depth(-1, ~.x %||% NA_real_) %>% 
    unlist() %>% 
    array(c(3, 2, 3))

myarray
#> , , 1
#> 
#>      [,1] [,2]
#> [1,]   NA    2
#> [2,]    2    3
#> [3,]    2   NA
#> 
#> , , 2
#> 
#>      [,1] [,2]
#> [1,]   NA   NA
#> [2,]   NA    3
#> [3,]    2   NA
#> 
#> , , 3
#> 
#>      [,1] [,2]
#> [1,]    2   NA
#> [2,]    3    3
#> [3,]    2   NA

apply(is.na(myarray), 1:2, function(x){sum(x) / length(x)})
#>           [,1]      [,2]
#> [1,] 0.6666667 0.6666667
#> [2,] 0.3333333 0.0000000
#> [3,] 0.0000000 1.0000000

Answer 2

使用基础R：

test<-function(x)
{
    count=sum(sapply(x,function(t)is.null(t)))
    count/length(x)
}

sapply(1:2,function(t,mlist){
    sapply(1:3,function(u,t,mlist){
        test(
            sapply(
                sapply(mlist,"[[",t,simplify=F),
            "[[",u,simplify=T)
            )
    },t,mlist)},
    mylist,simplify = F)


#[[1]]
#[1] 0.6666667 0.3333333 0.0000000

#[[2]]
#[1] 0.6666667 0.0000000 1.0000000

R：对列表

2 个答案: