将特定值转换为R中的列

时间:2018-10-24 07:57:16

标签: r

我有一个数据表,如下所示:

ID    time   somevalues  change
001   12:33  13          NA  
002   12:34  27          speed: 34   
003   12:35  45          width: 127
004   12:36  41          NA   
005   12:37  44          height: 19.2
006   12:35  45          NA
007   12:36  49          speed: 35   
008   12:37  44          speed: 27
009   12:38  45          NA
010   12:39  44          NA   
011   12:40  44          height: 18, speed: 28
012   12:41  40          NA
013   12:42  44          height: 18.1
014   12:43  55          width: 128.1
015   12:44  41          NA  
...   ...    ...         ...

该表包含传感器的各种测量值。有些测量仅在更改后才输入。此外,这些测量值始终输入同一列中。我需要的是一个数据表,看起来像这样:

ID    time   somevalues  speed  height  width
001   12:33  13          34     19.1    128
002   12:34  27          34     19.1    128
003   12:35  45          34     19.1    127
004   12:36  41          34     19.1    127
005   12:37  44          34     19.2    127
006   12:35  45          34     19.2    127
007   12:36  49          35     19.2    127
008   12:37  44          27     19.2    127
009   12:38  45          27     19.2    127
010   12:39  44          27     19.2    127
011   12:40  44          28     18      127
012   12:41  40          28     18      127
013   12:42  44          28     18.1    127
014   12:43  55          28     18.1    128.1
015   12:44  41          28     18.1    128.1
...   ...    ...         ...    ...     ...

我需要这种格式的数据才能对其进行分析和可视化。 有没有一种方法可以在R中不使用多个if语句呢?

1 个答案:

答案 0 :(得分:0)

这对您有用吗?

library(dplyr)

# create data - had to remove the spaces in change, to read the table, but shouldn't make a difference.
data_temp = read.table(text = "
ID    time   somevalues  change
001   12:33  13          NA  
002   12:34  27          speed:34   
003   12:35  45          width:127
004   12:36  41          NA   
005   12:37  44          height:19.2
006   12:35  45          NA
007   12:36  49          speed:35   
008   12:37  44          speed:27
009   12:38  45          NA
010   12:39  44          NA   
011   12:40  44          height:18,speed:28
012   12:41  40          speed:29,width:120.1
013   12:42  44          height:18.1,speed:30,with:50
014   12:43  55          width:128.1
015   12:44  41          NA" 
                  , header = T, stringsAsFactors = F)


data_wanted = select(data_temp, ID, time, somevalues)

speed = which(grepl("speed:", data_temp$change)) # in which rows is speed
speed_string = gsub(".*speed:", "", data_temp$change[speed]) # get string and remove everything before the speed value
speed_string = gsub(",.*", "", speed_string) # revomve everything behinde the speed value

# set speed variable via loop
# speed contains the positions of rows with information about speed.
# so from row 1 to speed[1]-1 we dont know anthyting about speed yet and so it shall be na
# from position speed[1] to speed[2]-1 it shall be the value of speed_string[1] and so on

data_wanted$speed = NA

for(i in 1:length(speed))
{
 current = speed[i] # position of speed-update-information
 till_next = ifelse(i < length(speed), speed[i+1]-1, NROW(data_wanted)) # untill position of following speed-update-information or end of Dataframe if no more update information
 data_wanted$speed[current:till_next] = as.numeric(speed_string[i]) # set values 
}


data_wanted
cbind(data_wanted, data_temp$change)

# ID  time somevalues speed             data_temp$change
# 1   1 12:33         13    NA                         <NA>
# 2   2 12:34         27    34                     speed:34
# 3   3 12:35         45    34                    width:127
# 4   4 12:36         41    34                         <NA>
# 5   5 12:37         44    34                  height:19.2
# 6   6 12:35         45    34                         <NA>
# 7   7 12:36         49    35                     speed:35
# 8   8 12:37         44    27                     speed:27
# 9   9 12:38         45    27                         <NA>
# 10 10 12:39         44    27                         <NA>
# 11 11 12:40         44    28           height:18,speed:28
# 12 12 12:41         40    29         speed:29,width:120.1
# 13 13 12:42         44    30 height:18.1,speed:30,with:50
# 14 14 12:43         55    30                  width:128.1
# 15 15 12:44         41    30                         <NA>