在R中读取包含转义字符的CSV文件

时间:2017-12-13 22:14:01

标签: r csv

我有一个带反斜杠的CSV文件以逃避某些逗号,如下所示:

c389c3d0-1175-465f-b2bb-1070f24d17a6,eccbc87e4b5ce2fe28308fd9f2a7baf3,{"parsed_query":{"qty":"2"\,"unit":null\,"brand":null\,"food":"apples"}\,"accuracy":"yellow"},apples,NULL,NULL,g,189.28,0.62,0.10,0.00,3.64,50.27,8.74,37.82,0.95,389.48,40.04,2015-10-19 21:24:49,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL
5df13606-1f78-491f-a4b2-d104f82d030f,c81e728d9d4c2f636f067f89cc14862c,{"parsed_query":{"qty":"2"\,"unit":null\,"brand":null\,"food":"oranges"}\,"accuracy":"yellow"},oranges,NULL,NULL,g,137.20,0.42,0.05,0.00,2.80,35.11,6.16,23.80,2.55,464.80,64.40,2015-10-19 21:24:49,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,0,NULL,NULL,NULL,NULL

我试图将文件读入R,但我收到以下错误:

  

read.table出错(file = file,header = header,sep = sep,quote = quote,:     列数多于列名

有谁知道如何让R读取反斜杠作为转义字符?提前谢谢!

1 个答案:

答案 0 :(得分:1)

如果您不想使用某人错误地卡在CSV记录中间的JSON blob,您还可以执行以下操作。它有点重量级,但它可以解决粗糙的中场JSON blob而不会打扰他们:

library(stringi)
library(tidyverse)

stri_read_lines("~/Desktop/a.csv") %>%
  map_df(~{

    m1 <- stri_match_first_regex(.x, "^([[:alnum:]-\\:, \\.]+)\\{")[,2]
    m2 <- stri_match_last_regex(.x, "\\}([[:alnum:]-\\:, \\.]+)")[,2]

    c(
      stri_split_fixed(m1, ",", simplify=TRUE)[,-1],
      stri_replace_first_fixed(.x, m1, "") %>% stri_replace_last_fixed(m2, ""),
      stri_split_fixed(m2, ",", simplify=TRUE)[,-1]
    ) %>% 
      as.list() %>%
      set_names(~sprintf("X%d", 1:length(.x)))

  }) %>% 
  glimpse()
## Observations: 2
## Variables: 32
## $ X1  <chr> "eccbc87e4b5ce2fe28308fd9f2a7baf3", "c81e728d9d4c2f636f067f89cc14862c"
## $ X2  <chr> "", ""
## $ X3  <chr> "{\"parsed_query\":{\"qty\":\"2\"\\,\"unit\":null\\,\"brand\":null\\,\"food\":\"apples\"}\\,\"accuracy\":\"yellow\"}", "{\"parsed_query\":{\...
## $ X4  <chr> "apples", "oranges"
## $ X5  <chr> "NULL", "NULL"
## $ X6  <chr> "NULL", "NULL"
## $ X7  <chr> "g", "g"
## $ X8  <chr> "189.28", "137.20"
## $ X9  <chr> "0.62", "0.42"
## $ X10 <chr> "0.10", "0.05"
## $ X11 <chr> "0.00", "0.00"
## $ X12 <chr> "3.64", "2.80"
## $ X13 <chr> "50.27", "35.11"
## $ X14 <chr> "8.74", "6.16"
## $ X15 <chr> "37.82", "23.80"
## $ X16 <chr> "0.95", "2.55"
## $ X17 <chr> "389.48", "464.80"
## $ X18 <chr> "40.04", "64.40"
## $ X19 <chr> "2015-10-19 21:24:49", "2015-10-19 21:24:49"
## $ X20 <chr> "NULL", "NULL"
## $ X21 <chr> "NULL", "NULL"
## $ X22 <chr> "NULL", "NULL"
## $ X23 <chr> "NULL", "NULL"
## $ X24 <chr> "NULL", "NULL"
## $ X25 <chr> "NULL", "NULL"
## $ X26 <chr> "NULL", "NULL"
## $ X27 <chr> "NULL", "NULL"
## $ X28 <chr> "0", "0"
## $ X29 <chr> "NULL", "NULL"
## $ X30 <chr> "NULL", "NULL"
## $ X31 <chr> "NULL", "NULL"
## $ X32 <chr> "NULL", "NULL"