将DAT文件导入R但不均匀的列

时间:2013-10-17 20:23:12

标签: r

我有一个我想要读入R的DAT文件,但是当我导入我的数据时,它继续显示我有10列/变量(来自第一行),实际上,它实际上应该是29列/变量。我该如何解决这个问题?

记事本上的DAT文件示例:

smsa66 smsa76 nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded
nodaded momed nomomed momdad14 sinmom14 step14 south66 south76
lwage76 famed black wage76 enroll76 kww iqscore mar76 libcrd14
exp76 exp762

        1          1          0          0          0          0          7
        5         29       9.94          1      10.25          1          1
        0          0          0          0   6.306275          9          1
      548          0         15          .          1          0         16
      256
        1          1          0          0          0          0         12
       11         27          8          0          8          0          1
        0          0          0          0   6.175867          8          0
      481          0         35         93          1          1          9
       81
        1          1          0          0          0          0         12
       12         34         14          0         12          0          1
        0          0          0          0   6.580639          2          0
      721          0         42        103          1          1         16
      256
        1          1          1          1          1          0         11
       11         27         11          0         12          0          1
        0          0          0          0   5.521461          6          0
      250          0         25         88          1          1         10
      100
        1          1          1          1          1          0         12
       12         34          8          0          7          0          1
        0          0          0          0   6.591674          8          0
      729          0         34        108          1          0         16
      256
        1          1          1          1          1          0         12
       11         26          9          0         12          0          1
        0          0          0          0   6.214608          6          0
      500          0         38         85          1          1          8
       64
        1          1          1          1          1          0         18
       16         33         14          0         14          0          1
        0          0          0          0   6.336826          1          0
      565          0         41        119          1          1          9
       81
        1          1          1          1          1          0         14
       13         29         14          0         14          0          1
        0          0          0          0   6.410175          1          0
      608          0         46        108          1          1          9
       81

1 个答案:

答案 0 :(得分:0)

txt1<-"    smsa66 smsa76 nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded
    nodaded momed nomomed momdad14 sinmom14 step14 south66 south76
    lwage76 famed black wage76 enroll76 kww iqscore mar76 libcrd14
    exp76 exp762"

txt2 <-    
"            1          1          0          0          0          0          7
            5         29       9.94          1      10.25          1          1
            0          0          0          0   6.306275          9          1
          548          0         15          NA          1          0         16
          256
            1          1          0          0          0          0         12
           11         27          8          0          8          0          1
            0          0          0          0   6.175867          8          0
          481          0         35         93          1          1          9
           81
            1          1          0          0          0          0         12
           12         34         14          0         12          0          1
            0          0          0          0   6.580639          2          0
          721          0         42        103          1          1         16
          256
            1          1          1          1          1          0         11
           11         27         11          0         12          0          1
            0          0          0          0   5.521461          6          0
          250          0         25         88          1          1         10
          100
            1          1          1          1          1          0         12
           12         34          8          0          7          0          1
            0          0          0          0   6.591674          8          0
          729          0         34        108          1          0         16
          256
            1          1          1          1          1          0         12
           11         26          9          0         12          0          1
            0          0          0          0   6.214608          6          0
          500          0         38         85          1          1          8
           64
            1          1          1          1          1          0         18
           16         33         14          0         14          0          1
            0          0          0          0   6.336826          1          0
          565          0         41        119          1          1          9
           81
            1          1          1          1          1          0         14
           13         29         14          0         14          0          1
            0          0          0          0   6.410175          1          0
          608          0         46        108          1          1          9
           81"

现在代码:

inp <- scan(text=txt2, what="numeric")
inmat <- matrix( as.numeric(inp), ncol=29, byrow=TRUE)
dfrm <- as.data.frame(inmat)
scan(text=txt1, what="")
Read 29 items
 [1] "smsa66"   "smsa76"   "nearc2"   "nearc4"   "nearc4a"  "nearc4b"  "ed76"    
 [8] "ed66"     "age76"    "daded"    "nodaded"  "momed"    "nomomed"  "momdad14"
[15] "sinmom14" "step14"   "south66"  "south76"  "lwage76"  "famed"    "black"   
[22] "wage76"   "enroll76" "kww"      "iqscore"  "mar76"    "libcrd14" "exp76"   
[29] "exp762"  
names(dfrm) <- scan(text=txt1, what="")
#Read 29 items
 dfrm
#-----------------------
  smsa66 smsa76 nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded nodaded momed nomomed
1      1      1      0      0       0       0    7    5    29  9.94       1 10.25       1
2      1      1      0      0       0       0   12   11    27     8       0     8       0
3      1      1      0      0       0       0   12   12  
snipped remainder of output

最终结果:

str(dfrm)
'data.frame':   8 obs. of  29 variables:
 $ smsa66  : num  1 1 1 1 1 1 1 1
 $ smsa76  : num  1 1 1 1 1 1 1 1
 $ nearc2  : num  0 0 0 1 1 1 1 1
 $ nearc4  : num  0 0 0 1 1 1 1 1
 $ nearc4a : num  0 0 0 1 1 1 1 1
 $ nearc4b : num  0 0 0 0 0 0 0 0
 $ ed76    : num  7 12 12 11 12 12 18 14
 $ ed66    : num  5 11 12 11 12 11 16 13
 $ age76   : num  29 27 34 27 34 26 33 29
 $ daded   : num  9.94 8 14 11 8 9 14 14
 $ nodaded : num  1 0 0 0 0 0 0 0
 $ momed   : num  10.2 8 12 12 7 ...
 $ nomomed : num  1 0 0 0 0 0 0 0
 $ momdad14: num  1 1 1 1 1 1 1 1
 $ sinmom14: num  0 0 0 0 0 0 0 0
 $ step14  : num  0 0 0 0 0 0 0 0
 $ south66 : num  0 0 0 0 0 0 0 0
 $ south76 : num  0 0 0 0 0 0 0 0
 $ lwage76 : num  6.31 6.18 6.58 5.52 6.59 ...
 $ famed   : num  9 8 2 6 8 6 1 1
 $ black   : num  1 0 0 0 0 0 0 0
 $ wage76  : num  548 481 721 250 729 500 565 608
 $ enroll76: num  0 0 0 0 0 0 0 0
 $ kww     : num  15 35 42 25 34 38 41 46
 $ iqscore : num  NA 93 103 88 108 85 119 108
 $ mar76   : num  1 1 1 1 1 1 1 1
 $ libcrd14: num  0 1 1 1 0 1 1 1
 $ exp76   : num  16 9 16 10 16 8 9 9
 $ exp762  : num  256 81 256 100 256 64 81 81