Question

我想知道是否存在返回以下内容的矢量化方式：

我有一个vector =

x = c(-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,11,10,9,8,7,6,5,4,3,2,1,0,-1,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12)

我希望得到一个相同长度的向量，这样当它超过5时，它会将其设置为1（TRUE），直到它低于0（FALSE）。我目前正在做一个for循环，如果上面的系列有大量的观察，我将永远这样做。

答案应该回复：

results = c(0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1)

有什么想法吗？

Answer 1

使用包zoo，您可以使用：

results2 <- na.locf(c(NA,1,0)[(x>=5) + 2*(x<=0) + 1],na.rm=FALSE)

identical(results2, results)
#[1] TRUE

Answer 2

您可以使用逻辑值识别更改点并查找该状态的更改：

findChangePoint <- function(y,cp){
  results <- 0*y
  state = 0 
  i = 1
  while (i <= length(y)){
    if((state ==0 ) & (y[i] >max(cp))){
      state = 1
    }
    if ((state == 1) && (y[i] <= min(cp))){
      state = 0
    }
    results[i] = state
    i = i+1
  }
  return(results)
}

然后我们可以创建一个函数来绘制它：

plotChangePoints <- function(y,cp){
  p.state <- ggplot(data = data.frame(x = seq(1,length(y)),
                                      y=y,
                                      state = findChangePoint(y,cp))) +
    geom_point(aes(x = x,
                   y = y)) +
    geom_point(aes(x = x,
                  y = state),
               color = "red")    
  print(p.state)
  return(p.state)
}

所以现在当你这样做时，使用建议的更复杂的数据：

y <- c(-0.6, -0.3, 0.5, 0.6, 3, 4.1, 6.7, 3.7, 7.5,
     4.1, 6.8, 4.8, 3.3, 1.6, 3.1, 2, 1.3, 2.9, 2.8, 1.9,
     0, -0.5, -0.6, 0.3, 1.9, 5.1, 6.4)
# specify the change points we will use:
cp=c(5,1)
plotChangePoints(y,cp)

你得到这个，黑点是数据，红色是状态（即'切换'或不是）

enter image description here

而且，如果你想要的只是结果，请使用：

results <- findChangePoint(y,cp)

Answer 3

这非常难看，但它似乎适用于非常复杂的场景：

entex <- function(x,uplim,lwlim) {

  result <- vector("integer",0)
  upr <- which(x>=uplim)
  lwr <- which(x<=lwlim)

  while(length(upr) > 0) {
    if(min(upr) > max(lwr)) {
      result <- unique(c(result,upr))
      upr <- upr[upr > max(result)]
    } else
    {
      result <- unique(c(result,upr[1]:(min(lwr[lwr>upr[1]])-1)))
      lwr <- lwr[lwr > max(result)]
      upr <- upr[upr > max(result)]
    }
  }
  result
}

显示它有效：

plot(x,pch=19,type="o")
abline(h=c(0,5),col="lightblue")
result <- entex(x,5,0)
abline(v=result,col="red")

enter image description here

使用更复杂的x示例：

x <- c(-0.6, -0.3, 0.5, 0.6, 3, 4.1, 6.7, 3.7, 7.5, 4.1, 6.8, 4.8, 3.3,
       1.6, 3.1, 2, 1.3, 2.9, 2.8, 1.9, 0, -0.5, -0.6, 0.3, 1.9, 5.1, 6.4)

enter image description here

Answer 4

更新：编辑，测试，＆amp;已添加基准。

（抱歉我昨天无法测试）

这是一个基本上纯逻辑比较的解决方案，比zoo

快20％

identical(results, UpAndDown(x))
# [1] TRUE

## 2,000 iterations, less than 0.1 seconds. 
> system.time(for(i in 1:2000) UpAndDown(x))
   user  system elapsed 
  0.080   0.001   0.082 

UpAndDown <- function(x, lowBound=0, upBound=5, numeric=TRUE) {
  ## This gets most of it
  high <-  (x >= upBound)
  low  <-  (x <= lowBound)

  res <- high & !low

  ## This grabs the middle portions
  fvs <- which(x==upBound)  
  zrs <- which(x==lowBound) 

  # The middle spots are those where zrs > fvs
  m <- which(zrs > fvs)

  # This is only iterating over a vector of a handufl of indecies
  #  It's not iterating over x
  mids <- unlist(lapply(m, function(i) seq(fvs[i], zrs[i]-1)), use.names=FALSE)
  res[mids] <- TRUE

  if (numeric)
    res <- as.numeric(res)

  # logical
  return(res)

}

基准：

# Small x
microbenchmark(UpAndDown=UpAndDown(x), Entex=entex(x,5,0), ZOO=na.locf(c(NA,1,0)[(x==5) + 2*(x<=0) + 1],na.rm=FALSE))

Unit: microseconds
      expr    min      lq  median      uq     max neval
 UpAndDown 31.573 36.1965 42.4240 46.9765 146.599   100
     Entex 40.113 46.1030 51.9605 57.3170 114.269   100
       ZOO 60.169 68.7335 78.2480 83.0360 176.159   100

更大的x：

# With Larger x

x <- c(seq(-10, 10), seq(11, -7), seq(-8, 15), seq(16, -28), seq(-29, 100), seq(101, -9)) 
x <- c(x, x, x)
length(x)
# [1] 1050

## CONFIRM VALUES
identical(UpAndDown(x), na.locf(c(NA,1,0)[(x==5) + 2*(x<=0) + 1]))
# [1] TRUE

## Benchmark
microbenchmark(
    UpAndDown=UpAndDown(x), 
    fcp=findChangePoint(x, c(5,1)), 
    Entex=entex(x,5,0), 
    ZOO=na.locf(c(NA,1,0)[(x==5) + 2*(x<=0) + 1],na.rm=FALSE)
  )

Unit: microseconds
      expr      min        lq    median        uq       max neval
 UpAndDown  141.149  162.9125  183.8080  206.9560   403.528   100
       fcp 5719.692 6056.1760 6379.4355 7376.7370 21456.502   100
     Entex  416.570  446.8780  469.7845  501.0985   795.853   100
       ZOO  192.449  209.1260  249.3805  281.4820   489.416   100

注意：如果期望非整数值（或者，通常缺少确切的边界数，例如0＆amp; 5），则使用以下定义

  ## ----------------------------##
    fvs <- which(high)
    zrs <- which(low)

    # This is only iterating over a vector of a handufl of indecies
    #  It's not iterating over x
    mids <- unlist(sapply(fvs, function(x) {
                                Z <- x<zrs; 
                                if (any(Z)) 
                                  seq(x, zrs[min(which(Z), na.rm=TRUE)]-1)
                            }
                  ), use.names=FALSE)

Answer 5

这真是一个很长的评论...... 让我感到震惊的是，这就是施密特触发器（opamp）所做的。这让我想知道是否有办法运行具有可重置条件的while循环。

limits <- c(5,0)
flop = 1
threshold<-limits[1]
for(j in 1:length(x) {

 while(x*(-1^(1-flop) < threshold) { 
do_stuff
}
threshold<-limits[flop+1]
flop <- !flop
}

我可能有一些负面的迹象，但你明白了。

Answer 6

您可以使用rle()并完全避免为/ while循环编写：

x <- c(-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,11,10,9,8,7,6,5,4,3,2,1,0,-1,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12)

result <- rep(99, length(x))
result[x >= 5] <- 1
result[x <= 0] <- 0

result
#  [1]  0  0  0  0  0 99 99 99 99  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 99
# [26] 99 99 99  0  0  0  0  0 99 99 99 99  1  1  1  1  1  1  1  1

# Run-length-encode it
result_rle <- rle(result)
# Find the 99's and replace them with the previous value
missing_idx <- which(result_rle$values == 99)
result_rle$values[missing_idx] <- result_rle$values[missing_idx - 1]
# Inverse of the RLE
result <- inverse.rle(result_rle)

# Check value
expected <- c(0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1)
identical(result, expected)
# TRUE

请注意，如果第一个值介于0和5之间，则会出错，但添加检查很简单。在这种情况下，您还需要确定您想要的行为。

矢量化的进入和退出

6 个答案:

更新：编辑，测试，＆amp;已添加基准。

基准：

更大的x：