计算直到达到值的行数,并在数据集内的特定范围内执行此操作

时间:2015-03-29 04:05:49

标签: r excel matlab

我是初学R用户(不完全确定它是否是最适合此用途的语言),并且很难找到我认为可能相对容易的解决方案。

编辑 - 我的数据的前60帧如下所示:

structure(list(Filename = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("BJB1NRL.mat", "DJG1NRL.mat", 
"DMA1NRL.mat", "DWS1NRL.mat", "EJC1NRL.mat", "FMB1NRL.mat", "GPC1NRL.mat", 
"GZZ1NRL.mat", "JMC1NRL.mat", "JTH1NRL.mat", "KAN1NRL.mat", "MAS1NRL.mat", 
"MFM1NRL.mat", "MJU1NRL.mat", "OVK1NRL.mat", "PCA1NRL.mat", "RHG1NRL.mat", 
"RHM1NRL.mat", "RJS1NRL.mat", "SIS1NRL.mat", "SXV1NRL.mat", "TXN1NRL.mat"
), class = "factor"), Label = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "V", class = "factor"), 
    seg_Start = c(231.537, 231.537, 231.537, 231.537, 231.537, 
    231.537, 231.537, 231.537, 231.537, 231.537, 231.537, 231.537, 
    231.537, 231.537, 231.537, 231.537, 231.537, 231.537, 231.537, 
    231.537, 231.537, 231.537, 554.141, 554.141, 554.141, 554.141, 
    554.141, 554.141, 554.141, 554.141, 554.141, 554.141, 554.141, 
    554.141, 554.141, 554.141, 554.141, 554.141, 554.141, 554.141, 
    554.141, 554.141, 554.141, 554.141, 554.141, 554.141, 554.141, 
    554.141, 554.141, 554.141, 554.141, 554.141, 554.141, 554.141, 
    554.141, 554.141, 554.141, 1081.457, 1081.457, 1081.457), 
    seg_End = c(444.039, 444.039, 444.039, 444.039, 444.039, 
    444.039, 444.039, 444.039, 444.039, 444.039, 444.039, 444.039, 
    444.039, 444.039, 444.039, 444.039, 444.039, 444.039, 444.039, 
    444.039, 444.039, 444.039, 887.246, 887.246, 887.246, 887.246, 
    887.246, 887.246, 887.246, 887.246, 887.246, 887.246, 887.246, 
    887.246, 887.246, 887.246, 887.246, 887.246, 887.246, 887.246, 
    887.246, 887.246, 887.246, 887.246, 887.246, 887.246, 887.246, 
    887.246, 887.246, 887.246, 887.246, 887.246, 887.246, 887.246, 
    887.246, 887.246, 887.246, 1269.05, 1269.05, 1269.05), t_ms = c(230L, 
    240L, 250L, 260L, 270L, 280L, 290L, 300L, 310L, 320L, 330L, 
    340L, 350L, 360L, 370L, 380L, 390L, 400L, 410L, 420L, 430L, 
    440L, 550L, 560L, 570L, 580L, 590L, 600L, 610L, 620L, 630L, 
    640L, 650L, 660L, 670L, 680L, 690L, 700L, 710L, 720L, 730L, 
    740L, 750L, 760L, 770L, 780L, 790L, 800L, 810L, 820L, 830L, 
    840L, 850L, 860L, 870L, 880L, 890L, 1080L, 1090L, 1100L), 
    CPP = c(0, 0, 19.553, 16.063, 18.09, 21.547, 21.37, 21.616, 
    18.125, 20.936, 20.134, 23.642, 23.319, 24.674, 22.299, 22.587, 
    27.477, 25.924, 20.166, 20.195, 16.807, 18.189, 16.843, 16.07, 
    20.384, 26.11, 26.536, 28.891, 26.769, 27.622, 28.303, 24.373, 
    22.043, 23.579, 26.797, 26.384, 29.34, 31.437, 31.531, 24.404, 
    22.396, 26.416, 28.531, 26.735, 28.796, 28.494, 27.177, 27.325, 
    26.273, 29.066, 24.344, 21.124, 28.363, 24.193, 19.815, 11.897, 
    14.125, 0, 0, 22.398)), .Names = c("Filename", "Label", "seg_Start", 
"seg_End", "t_ms", "CPP"), row.names = c(NA, 60L), class = "data.frame")

我必须做的是直接手动做,但我正在努力创建一个脚本来为我做。 对于每个时间段(即231-444,然后是1081-1269等),我需要看到最终列(CPP)中的值达到阈值(例如28)所需的毫秒数。 也就是说,我需要计算从段开始的行数(t_ms = seg_Start),直到最后一行中的值达到此阈值,因为每行是10ms。 如果值永远不会达到阈值,那么应该给出值0。

到目前为止,我的代码主要是我尝试将数据集拆分以进行分析。它可能效率低下,而且更加令人担忧,不正确。

编辑 - 澄清 - 我需要分析CPP'每个"段的值"另外,对于每个不同的文件名'。一段是't_ms'中的值。介于' seg_Start'和' seg_End'。

例如,对于第一个文件名的第一个段,我的答案是“#28; 280ms' - 每行10毫秒。 CPP在第一个seg_start-seg_End之间的时间段中第一次达到28,在第28行(值= 28.891)。 10 x 28 = 280ms。

最终编辑删除了无关的数据集。左侧部分仅与问题相关。

1 个答案:

答案 0 :(得分:0)

编辑:

我编辑了您的数据以包含两个不同的文件名,如下所示。

df$Filename[15:17] <- "DMA1NRL.mat"

time.2.threshold是一个带有三个参数的函数:df - data,threshold - CPP threshold,time_factor - 以ms为单位的数字。

在函数内部,有三个循环。第一个循环负责文件名的数据。第二个循环处理seg_Start中的值,第三个最内层循环处理通过第二个循环获得的数据帧,并对CPP列的数据帧进行排序,并找到达到CPP阈值及以上的最小行值(行索引)

time_2_threshold是time_factor * rows_CPP_to_threshold

最后,time.2.threshold函数返回一个数据框,其中包含整个数据框的文件名,seg_Start,seg_End,rows_CPP_to_threshold,total_rows和time_2_threshold值。

我仍在努力了解您的期望,因为您的问题尚不明显,但步骤编码正确。人们可能会建议这个代码的一些惯用的R版本可能会提高性能,但我不是在这里做,因为目前还不清楚你的期望是什么。希望这有帮助!

time.2.threshold <- function(df = df, threshold = 25, time_factor = 10){

  df_threshold <- data.frame(filename = character(),
                             seg_Start = numeric(),
                             seg_End = numeric(),
                             rows_CPP_to_threshold = numeric(),
                             total_rows = numeric(),
                             time_2_threshold = numeric(),
                             stringsAsFactors = FALSE)

  for(df_file in unique(df$Filename)){

    df_filename <- df[which(df$Filename == df_file), ]

    for (i in unique(df$seg_Start)){

      start_df <- df_filename[which(df_filename$seg_Start == i), ]

      seg_End_unique <- unique(start_df$seg_End)

      if(length(seg_End_unique) > 0 ){

        for(j in seg_End_unique){

          df1 <- start_df[which(start_df$seg_End == j), ]

          #df1 <- df1[with(df1, order(CPP)),]

          df1_CPP_index = which(df1$CPP >= threshold)

          if( length(df1_CPP_index) > 0 && !is.na(df1_CPP_index) && !is.nan(df1_CPP_index) ){
            rows_CPP_to_threshold <- min(df1_CPP_index)
          }else{
            rows_CPP_to_threshold <- NA
          }

          total_rows = nrow(df1)

          df2 <- data.frame(filename = df_file,
                            seg_Start = i,
                            seg_End = j,
                            rows_CPP_to_threshold = rows_CPP_to_threshold,
                            total_rows = total_rows,
                            time_2_threshold = time_factor*rows_CPP_to_threshold,
                            stringsAsFactors = FALSE)

          df_threshold <- rbind(df_threshold, df2 )
        }    
      }else
        next
    }
  }
  return(df_threshold)
}

df_threshold <- time.2.threshold(df = df, threshold = 15, time_factor = 10)

输出

df_threshold <- time.2.threshold(df = df, threshold = 15, time_factor = 10)

    print(df_threshold)
     filename seg_Start  seg_End rows_CPP_to_threshold total_rows time_2_threshold
1 BJB1NRL.mat   231.537  444.039                     3         19               30
2 BJB1NRL.mat   554.141  887.246                     1         35               10
3 BJB1NRL.mat  1081.457 1269.050                     3          3               30
4 DMA1NRL.mat   231.537  444.039                     1          3               10

在评论出CPP列的排序后:

df_threshold <- time.2.threshold(df = df, threshold = 25, time_factor = 10)
print(df_threshold)
     filename seg_Start  seg_End rows_CPP_to_threshold total_rows time_2_threshold
1 BJB1NRL.mat   231.537  444.039                    15         19              150
2 BJB1NRL.mat   554.141  887.246                     4         35               40
3 BJB1NRL.mat  1081.457 1269.050                    NA          3               NA
4 DMA1NRL.mat   231.537  444.039                     3          3               30