使用R从多个excel文件中提取某些数据

时间:2017-10-19 23:31:04

标签: r excel

我将我的数据从多个Excel文件导入R,我的数据在R中看起来像这样(每天可能有100多个文件):

> data
[[1]]
   ST Code Emp          Employee              Pay.Code Hours   Gross
1  AL 7229  65                 S                HOURLY  0.00    0.00
2  AL 7229  65                 S                SALARY  0.00 3060.00
3  AL 7229  65                 S              PER DIEM  0.00  765.00
4  AL 7229  65                 S EXPENSE REIMBURSEMENT  0.00   11.00
5  CA   42   2                 R                HOURLY 60.00  720.00
6  CA   42   2                 R              OVERTIME  3.25   58.50
7  CA   42   3                 A                HOURLY 80.00  800.00
8  CA   42   3                 A              OVERTIME  6.25   93.75
9  CA   42   4                 N                HOURLY 79.25  990.63
10 CA   42   4                 N              OVERTIME  7.00  131.25
11 CA   42   9                 P                HOURLY 32.00  352.00
12 CA   42   9                 P              OVERTIME  1.75   28.88
13 CA   42  10                 E                HOURLY 72.00  864.00
14 CA   42  10                 E              OVERTIME  5.00   90.00

[[2]]
   ST Code Employee Pay.Code    Gross
1 AL  7229       NA       NA  23954.0
2 AL  8380       NA       NA  11092.1
3  GA 7380       NA       NA  98142.0
4  GA 8380       NA       NA  11984.0
5  NC 7380       NA       NA 218129.0
6  NC 8380       NA       NA  27891.0
7  TN 7380       NA       NA  28441.0
8  TN 8380       NA       NA   8348.0

现在我要做的是将code = "7229"作为单个数据集导出并导出到这样的新excel文件:

  > data

   ST Code Emp          Employee              Pay.Code Hours   Gross
1  AL 7229  65                 S                HOURLY  0.00    0.00
2  AL 7229  65                 S                SALARY  0.00 3060.00
3  AL 7229  65                 S              PER DIEM  0.00  765.00
4  AL 7229  65                 S EXPENSE REIMBURSEMENT  0.00   11.00
5  AL 7229  NA                                           NA  23954.0
6 AL  8380  NA                                           NA  11092.1

有没有更好的方法来做到这一点?

2 个答案:

答案 0 :(得分:1)

尝试

df = do.call("rbind", data)

然后,您可以将所有数据放在一个可以过滤的数据框中:

df[which(df$Code == 7229),]

答案 1 :(得分:1)

这应该可以解决问题:

library(tidyverse)

df_list %>% 
  map_dfr(filter, Code == 7229) %>% 
  write_csv(path = "/INSERT/PATH/HERE/text.csv")

以下是具有可重现示例的代码:

df_1 <- tribble(
  ~ST,  ~Code, ~Emp, ~Employee, ~Pay.Code,               ~Hours, ~Gross,
  "AL", 7229,  65,   "S",       "HOURLY",                0.00,   0.00,
  "AL", 7229,  65,   "S",       "SALARY",                0.00,   3060.00,
  "AL", 7229,  65,   "S",       "PER DIEM",              0.00,   765.00,
  "AL", 7229,  65,   "S",       "EXPENSE REIMBURSEMENT", 0.00,   11.00,
  "CA", 42,    2,    "R",       "HOURLY",                60.00,  720.00,
  "CA", 42,    2,    "R",       "OVERTIME",              3.25,   58.50,
  "CA", 42,    3,    "A",       "HOURLY",                80.00,  800.00,
  "CA", 42,    3,    "A",       "OVERTIME",              6.25,   93.75,
  "CA", 42,    4,    "N",       "HOURLY",                79.25,  990.63,
  "CA", 42,    4,    "N",       "OVERTIME",              7.00,   131.25,
  "CA", 42,    9,    "P",       "HOURLY",                32.00,  352.00,
  "CA", 42,    9,    "P",       "OVERTIME",              1.75,   28.88,
  "CA", 42,    10,   "E",       "HOURLY",                72.00,  864.00,
  "CA", 42,    10,   "E",       "OVERTIME",              5.00,   90.00
)

df_2 <- tribble(
  ~ST, ~Code, ~Employee, ~Pay.Code,    ~Gross,
  "AL", 7229,       NA,       NA,  23954.0,
  "AL", 8380,       NA,       NA,  11092.1,
  "GA", 7380,       NA,       NA,  98142.0,
  "GA", 8380,       NA,       NA,  11984.0,
  "NC", 7380,       NA,       NA, 218129.0,
  "NC", 8380,       NA,       NA,  27891.0,
  "TN", 7380,       NA,       NA,  28441.0,
  "TN", 8380,       NA,       NA,   8348.0
)

df_list <- list(df_1, df_2)

df_list %>% 
  map_dfr(filter, Code == 7229) %>% 
  write_csv(path = "/INSERT/PATH/HERE/text.csv")

给出了:

# A tibble: 5 x 7
     ST  Code   Emp Employee              Pay.Code Hours Gross
  <chr> <dbl> <dbl>    <chr>                 <chr> <dbl> <dbl>
1    AL  7229    65        S                HOURLY     0     0
2    AL  7229    65        S                SALARY     0  3060
3    AL  7229    65        S              PER DIEM     0   765
4    AL  7229    65        S EXPENSE REIMBURSEMENT     0    11
5    AL  7229    NA     <NA>                  <NA>    NA 23954