将嵌套列表转换为矩阵

时间:2017-06-17 13:20:36

标签: json r

我将来自Elastic的过滤后的JSON数据作为具有多个级别的嵌套列表。我很有兴趣将嵌套列表转换为Rstudio中的2d进行数据分析。

我在网上尝试过多个技巧,但没有一个有效(plytr,rbind,unlist,tidyjson)。我迫切需要帮助

结构示例:

> str(RAW2)
List of 10
 $ :List of 5
  ..$ _index: chr "xml-search-2017.06.12"
  ..$ _type : chr "xml"
  ..$ _id   : chr "76595087100_1"
  ..$ _score: num 13.5
  ..$ fields:List of 3
  .. ..$ check_in_date :List of 1
  .. .. ..$ : chr "2017-06-20T00:00:00.000Z"
  .. ..$ check_out_date:List of 1
  .. .. ..$ : chr "2017-06-23T00:00:00.000Z"
  .. ..$ ts_start      :List of 1
  .. .. ..$ : chr "2017-06-12T20:11:07.348Z"
 $ :List of 5
  ..$ _index: chr "xml-search-2017.06.12"
  ..$ _type : chr "xml"
  ..$ _id   : chr "76595087300_1"
  ..$ _score: num 13.5
  ..$ fields:List of 3
  .. ..$ check_in_date :List of 1
  .. .. ..$ : chr "2017-06-20T00:00:00.000Z"
  .. ..$ check_out_date:List of 1
  .. .. ..$ : chr "2017-06-23T00:00:00.000Z"
  .. ..$ ts_start      :List of 1
  .. .. ..$ : chr "2017-06-12T20:11:07.060Z"

谢谢

list(structure(list(`_index` = "xml-search-2017.06.12", `_type` = "xml", 
    `_id` = "76595087100_1", `_score` = 13.457847, fields = structure(list(
        check_in_date = list("2017-06-20T00:00:00.000Z"), check_out_date = list(
            "2017-06-23T00:00:00.000Z"), ts_start = list("2017-06-12T20:11:07.348Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76595087300_1", `_score` = 13.457847, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-23T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:11:07.060Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76594844800_1", `_score` = 13.455816, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-22T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:11:03.445Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76597264600_1", `_score` = 13.455816, 
    fields = structure(list(check_in_date = list("2017-06-13T00:00:00.000Z"), 
        check_out_date = list("2017-06-16T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:13:15.005Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76599762900_1", `_score` = 13.455723, 
    fields = structure(list(check_in_date = list("2017-06-22T00:00:00.000Z"), 
        check_out_date = list("2017-06-28T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:14:37.454Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76595493900_1", `_score` = 13.455723, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-23T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:11:07.348Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76597065400_1", `_score` = 13.169026, 
    fields = structure(list(check_in_date = list("2017-06-13T00:00:00.000Z"), 
        check_out_date = list("2017-06-16T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:13:14.994Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76594777600_1", `_score` = 13.169026, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-22T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:11:03.440Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76572404700_1", `_score` = 13.169026, 
    fields = structure(list(check_in_date = list("2017-06-13T00:00:00.000Z"), 
        check_out_date = list("2017-06-14T00:00:00.000Z"), ts_start = list(
            "2017-06-12T19:53:56.580Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76598481000_1", `_score` = 12.763965, 
    fields = structure(list(check_in_date = list("2017-06-22T00:00:00.000Z"), 
        check_out_date = list("2017-06-28T00:00:00.000Z"), ts_start = list(
            "2017-06-12T20:14:37.452Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")))

子集2:

list(structure(list(`_index` = "xml-search-2017.06.12", `_type` = "xml", 
    `_id` = "76452356700_1", `_score` = 2.390721, fields = structure(list(
        check_in_date = list("2017-06-28T00:00:00.000Z"), check_out_date = list(
            "2017-07-02T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:21.311Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452363400_3", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
        check_out_date = list("2017-06-30T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:21.235Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452375900_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-01T00:00:00.000Z"), 
        check_out_date = list("2017-09-03T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:30.092Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452377300_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-22T00:00:00.000Z"), 
        check_out_date = list("2017-06-24T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:31.633Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452383100_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Rodeway Inn South Miami", 
        "East Miami", "Holiday Inn Port Of Miami", "Mayfair Hotel & Spa", 
        "Intercontinental Miami", "Marriott Biscayne Bay", "Eb Hotel Miami", 
        "Doubletree Grand Biscayne Bay", "Hotel Beaux Arts Miami", 
        "Cambria Suites Miami Airport", "Epic Miami, A Kimpton Hotel", 
        "The Ritz-Carlton Coconut Grove", "Quality Inn Miami Airport", 
        "Hilton Miami Downtown", "Conrad Miami", "Miccosukee Resort & Gaming", 
        "Courtyard Downtown", "Jw Marriott Marquis Miami", "Miami Marriott Dadeland", 
        "Courtyard Miami Coral Gables"), check_in_date = list(
        "2017-08-26T00:00:00.000Z"), check_out_date = list("2017-08-29T00:00:00.000Z"), 
        ts_start = list("2017-06-12T18:19:30.198Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452394200_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Rosen Plaza"), 
        check_in_date = list("2017-06-24T00:00:00.000Z"), check_out_date = list(
            "2017-06-27T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:31.672Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452409700_2", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Jw Marriott Hotel Mumbai Sahar"), 
        check_in_date = list("2018-03-12T00:00:00.000Z"), check_out_date = list(
            "2018-03-16T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:46.007Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452420500_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Holiday Inn Express Troisdorf"), 
        check_in_date = list("2017-06-25T00:00:00.000Z"), check_out_date = list(
            "2017-06-28T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:40.676Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452421700_2", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-23T00:00:00.000Z"), 
        check_out_date = list("2017-06-26T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:40.932Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452424300_2", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Aston Balikpapan"), 
        check_in_date = list("2017-07-05T00:00:00.000Z"), check_out_date = list(
            "2017-07-06T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:42.293Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452425100_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("H10 Duque De Loule"), 
        check_in_date = list("2017-07-28T00:00:00.000Z"), check_out_date = list(
            "2017-07-30T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:42.594Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452425500_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-25T00:00:00.000Z"), 
        check_out_date = list("2017-06-26T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:42.719Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452425600_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-17T00:00:00.000Z"), 
        check_out_date = list("2017-06-19T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:42.748Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452427300_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-17T00:00:00.000Z"), 
        check_out_date = list("2017-06-20T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:43.154Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452428100_10", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-11T00:00:00.000Z"), 
        check_out_date = list("2017-09-24T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:43.345Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452428800_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Ibis Istanbul City West"), 
        check_in_date = list("2017-06-30T00:00:00.000Z"), check_out_date = list(
            "2017-07-01T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:43.761Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452431500_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("The Rani Hotel And Spa"), 
        check_in_date = list("2017-07-07T00:00:00.000Z"), check_out_date = list(
            "2017-07-14T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:45.460Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452431700_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-17T00:00:00.000Z"), 
        check_out_date = list("2017-06-20T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:45.642Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452434500_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Cloitre St Louis", 
        "De L'Horloge", "Hotel D'Europe", "Danieli", "Bristol", 
        "Kyriad Palais Des Papes", "Regina"), check_in_date = list(
        "2017-06-24T00:00:00.000Z"), check_out_date = list("2017-06-25T00:00:00.000Z"), 
        ts_start = list("2017-06-12T18:19:47.037Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452434700_2", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-08T00:00:00.000Z"), 
        check_out_date = list("2017-09-20T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:47.086Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452447400_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-25T00:00:00.000Z"), 
        check_out_date = list("2017-09-30T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:51.056Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452451400_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-07-01T00:00:00.000Z"), 
        check_out_date = list("2017-07-04T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:40.306Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452452500_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-18T00:00:00.000Z"), 
        check_out_date = list("2017-06-19T00:00:00.000Z"), ts_start = list(
            "2017-06-12T18:19:52.461Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452452800_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Best Western Grand West'S Villas"), 
        check_in_date = list("2017-07-12T00:00:00.000Z"), check_out_date = list(
            "2017-07-14T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:40.753Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452453600_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Athens Status Suites", 
        "Ambrosia Hotel & Suites", "A For Athens", "Athenswas", 
        "Kimon Athens Hotel", "Chic", "Athinais", "Novus City Hotel"), 
        check_in_date = list("2017-08-12T00:00:00.000Z"), check_out_date = list(
            "2017-08-25T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:54.035Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")))

2 个答案:

答案 0 :(得分:3)

它还不是很干净,但这里有一个选择:

library(tidyverse)

df <- raw2 %>% map_df(flatten) %>% unnest()

df
#> # A tibble: 10 x 7
#>                 `_index` `_type`         `_id` `_score`
#>                    <chr>   <chr>         <chr>    <dbl>
#>  1 xml-search-2017.06.12     xml 76595087100_1 13.45785
#>  2 xml-search-2017.06.12     xml 76595087300_1 13.45785
#>  3 xml-search-2017.06.12     xml 76594844800_1 13.45582
#>  4 xml-search-2017.06.12     xml 76597264600_1 13.45582
#>  5 xml-search-2017.06.12     xml 76599762900_1 13.45572
#>  6 xml-search-2017.06.12     xml 76595493900_1 13.45572
#>  7 xml-search-2017.06.12     xml 76597065400_1 13.16903
#>  8 xml-search-2017.06.12     xml 76594777600_1 13.16903
#>  9 xml-search-2017.06.12     xml 76572404700_1 13.16903
#> 10 xml-search-2017.06.12     xml 76598481000_1 12.76397
#> # ... with 3 more variables: check_in_date <chr>, check_out_date <chr>,
#> #   ts_start <chr>

答案 1 :(得分:1)

基础R版本可以是,

do.call(rbind, lapply(l1, function(i) as.data.frame(t(unlist(i)))))