如何取消嵌套包含数据框的列表

时间:2019-04-19 16:04:40

标签: r dplyr tidyverse tidyr purrr

我正在尝试扩展一个包含数据帧列表的嵌套列。它们是NULL或1乘n列,因此目标是仅将n列添加到小标题。 (NULL列表项最好扩展为NA s。)

我尝试了几种解决方案,包括this answer的解决方案。

输出的目标将是平整的标题,其中包含以下列: 完整地址,地址,location.x,location.y,分数,属性。StreetName,attributes.Match_addr。

require(tidyverse)
#> Loading required package: tidyverse

df <- structure(list(full_address = c("2379 ADDISON BLVD, HIGH POINT, NC 27262", 
                                "1751 W LEXINGTON AVE, HIGH POINT, NC 27262", "2514 WILLARD DAIRY RD, HIGH POINT, NC 27265", 
                                "126 MARYWOOD DR, HIGH POINT, NC 27265", "508 EDNEY RIDGE RD, GREENSBORO, NC 27408"
), json = list(NULL, NULL, structure(list(address = "2514 WILLARD DAIRY", 
                                          location = structure(list(x = -79.9766181813648, y = 36.0477204695356), class = "data.frame", row.names = 1L), 
                                          score = 92.8, attributes = structure(list(StreetName = "WILLARD DAIRY", 
                                                                                    Match_addr = "2514 WILLARD DAIRY"), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L), 
               structure(list(address = "126 MARYWOOD, HIGH POINT", location = structure(list(
                 x = -80.0202617159213, y = 36.0077059145502), class = "data.frame", row.names = 1L), 
                 score = 97.24, attributes = structure(list(StreetName = "MARYWOOD", 
                                                            Match_addr = "126 MARYWOOD, HIGH POINT"), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L), 
               structure(list(address = "508 EDNEY RIDGE RD", location = structure(list(
                 x = -79.840872836677, y = 36.1105523384593), class = "data.frame", row.names = 1L), 
                 score = 100L, attributes = structure(list(StreetName = "EDNEY RIDGE", 
                                                           Match_addr = "508 EDNEY RIDGE RD"), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L))), class = c("tbl_df", 
                                                                                                                                                                                          "tbl", "data.frame"), row.names = c(NA, -5L))

df
#> # A tibble: 5 x 2
#>   full_address                                json                
#>   <chr>                                       <list>              
#> 1 2379 ADDISON BLVD, HIGH POINT, NC 27262     <NULL>              
#> 2 1751 W LEXINGTON AVE, HIGH POINT, NC 27262  <NULL>              
#> 3 2514 WILLARD DAIRY RD, HIGH POINT, NC 27265 <data.frame [1 × 4]>
#> 4 126 MARYWOOD DR, HIGH POINT, NC 27265       <data.frame [1 × 4]>
#> 5 508 EDNEY RIDGE RD, GREENSBORO, NC 27408    <data.frame [1 × 4]>


df %>% unnest(json)
#> Error: Argument 2 can't be a list containing data frames


df %>% map(unlist) %>% as_data_frame()
#> Warning: `as_data_frame()` is deprecated, use `as_tibble()` (but mind the new semantics).
#> This warning is displayed once per session.
#> Tibble columns must have consistent lengths, only values of length one are recycled:
#> * Length 5: Column `full_address`
#> * Length 18: Column `json`

df %>%
  mutate_if(is.list, simplify_all) %>%    # flatten each list element internally 
  unnest() 
#> Error: Argument 2 can't be a list containing data frames

reprex package(v0.2.1)于2019-04-19创建

1 个答案:

答案 0 :(得分:4)

问题之一是每一列中都有嵌套的data.frame

library(tidyverse)
df %>% 
   mutate(json = map(json, ~ if(is.null(.x)) 
    tibble(attributes.StreetName = NA_character_, attributes.Match_addr =  NA_character_) 
    else do.call(data.frame, c(.x, stringsAsFactors = FALSE)))) %>% 
    unnest
# A tibble: 5 x 7
#  full_address                     attributes.StreetNa… attributes.Match_ad… address           location.x location.y score
#  <chr>                            <chr>                <chr>                <chr>                  <dbl>      <dbl> <dbl>
#1 2379 ADDISON BLVD, HIGH POINT, … <NA>                 <NA>                 <NA>                    NA         NA    NA  
#2 1751 W LEXINGTON AVE, HIGH POIN… <NA>                 <NA>                 <NA>                    NA         NA    NA  
#3 2514 WILLARD DAIRY RD, HIGH POI… WILLARD DAIRY        2514 WILLARD DAIRY   2514 WILLARD DAI…      -80.0       36.0  92.8
#4 126 MARYWOOD DR, HIGH POINT, NC… MARYWOOD             126 MARYWOOD, HIGH … 126 MARYWOOD, HI…      -80.0       36.0  97.2
#5 508 EDNEY RIDGE RD, GREENSBORO,… EDNEY RIDGE          508 EDNEY RIDGE RD   508 EDNEY RIDGE …      -79.8       36.1 100  

或使用map_if

f1 <- function(dat) {
  dat %>% 
        flatten

   }

f2 <- function(dat) {
      tibble(attributes.StreetName = NA_character_, 
             attributes.Match_addr =  NA_character_)
     }

df %>%
    mutate(json = map_if(json, is.data.frame, f1, .else = f2)) %>% 
    unnest
# A tibble: 5 x 7
#  full_address                     attributes.StreetNa… attributes.Match_ad… address           score location.x location.y
#  <chr>                            <chr>                <chr>                <chr>             <dbl>      <dbl>      <dbl>
#1 2379 ADDISON BLVD, HIGH POINT, … <NA>                 <NA>                 <NA>               NA         NA         NA  
#2 1751 W LEXINGTON AVE, HIGH POIN… <NA>                 <NA>                 <NA>               NA         NA         NA  
#3 2514 WILLARD DAIRY RD, HIGH POI… WILLARD DAIRY        2514 WILLARD DAIRY   2514 WILLARD DAI…  92.8      -80.0       36.0
#4 126 MARYWOOD DR, HIGH POINT, NC… MARYWOOD             126 MARYWOOD, HIGH … 126 MARYWOOD, HI…  97.2      -80.0       36.0
#5 508 EDNEY RIDGE RD, GREENSBORO,… EDNEY RIDGE          508 EDNEY RIDGE RD   508 EDNEY RIDGE … 100        -79.8       36.1