Question

我想从x中选择一个元素，从y中选择一个元素（x和y是互斥的），并且从x或y中选择一个尚未被选中的元素。然后，我想重复该过程指定的次数，并将每个试验的结果存储在数据帧中。（注意：我不想尝试找到所有可能的组合）

以下代码有效但随着试验次数的增加而显着减慢。

x <- 1:4
y <- 5:8
z <- c(x, y) #edited - previous code read a, b in place of x, y
trials <- 5
sel <- data.frame()
set.seed(123)
for (i in 1:trials){
    x_sel <- sample(x, 1)
    y_sel <- sample(y, 1)
    rem <- z[!(z %in% c(x_sel, y_sel))]
    z_sel <- sample(rem, 1)
    sel <- rbind(sel, cbind(x_sel, y_sel, z_sel))
}

Answer 1

这可能会有点快，但我怀疑它是最快的。当然，我认为Rcpp会是最快的。

for

只有10k样本，这似乎比你的for循环快37倍（这主要是低效的，因为在for上一次添加一个东西，而不是<script src="http://code.jquery.com/jquery-2.1.4.min.js"></script> <script> $().ready(function(){ var counter = 0; $(".cube").each(function(idx,obj) { counter++; if(counter <= 5 ) { $(obj).addClass('newstyle'); } if(counter == 10) counter = 0; }) }); </script>中固有的东西环）。这个和更明智的{{1}}循环之间的区别可能要小得多。

Answer 2

我的方法并不优雅，但在许多试验很大时似乎是有效的。为了证明这一点，我创造了3个函数：f1 - 你的，f2 - joran的，f3 - 我的

library(microbenchmark)

f1 <- function() {
   x <- 1:4
   y <- 5:8
   z <- c(x, y) #edited - previous code read a, b in place of x, y
   trials <- 5000
   sel <- data.frame()
   set.seed(123)
   for (i in 1:trials) {
      x_sel <- sample(x, 1)
      y_sel <- sample(y, 1)
      rem <- z[!(z %in% c(x_sel, y_sel))]
      z_sel <- sample(rem, 1)
      sel <- rbind(sel, cbind(x_sel, y_sel, z_sel))
   }
   return(sel)
}

f2 <- function() {
   set.seed(123)
   x <- 1:4
   y <- 5:8
   z <- c(x, y)
   trials <- 5000

   xval <- sample(x, size = trials, replace = TRUE)
   yval <- sample(y, size = trials, replace = TRUE)
   zval <-
      mapply(
         FUN = function(x, y, z) {
            sample(setdiff(z, c(x, y)), 1)
         },
         x = xval,
         y = yval,
         MoreArgs = list(z = z)
      )

   result <- data.frame(xval = xval,
                        yval = yval,
                        zval = zval)
   return(result)
}


f3 <- function() {
   x <- 1:4
   y <- 5:8
   z <- c(x, y) #edited - previous code read a, b in place of x, y
   trials <- 5000
   set.seed(123)
   x_sel <- sample(x, trials, replace = TRUE)
   y_sel <- sample(y, trials, replace = TRUE)
   z_mac <- matrix(z,
                   nrow = trials,
                   ncol = length(z),
                   byrow = TRUE)
   take <- z_mac != x_sel & z_mac != y_sel
   z_sel <- t(matrix(t(z_mac)[t(take)], ncol = trials))
   take <- sample(1:ncol(z_sel), size = trials, replace = TRUE)
   cbind(x_sel, y_sel, z_sel = z_sel[cbind(1:trials, take)])
}


microbenchmark(f1(), f2(), f3(), times = 10L)

Unit:milliseconds
expr         min          lq        mean      median          uq         max neval
f1() 2193.448113 2248.442450 2258.626023 2258.135072 2267.333956 2346.457082    10
f2()  205.124501  208.672947  213.520267  212.208095  219.397101  222.990083    10
f3()    2.463567    2.491762    2.570517    2.512588    2.603582    2.827863    10

我的f3功能比f1快856倍，比f2快83倍。当我们考虑oryginal问题（试验= 5）然后

> microbenchmark(f1(), f2(), f3(), times = 10L)
Unit: microseconds
 expr      min       lq      mean    median       uq      max neval
 f1() 1215.924 1268.790 1296.7610 1300.5095 1321.015 1370.998    10
 f2()  587.937  590.500  619.6248  612.9285  638.881  687.261    10
 f3()   68.886   78.819   86.7652   81.2225   91.315  116.947    10

有没有办法让我避免使用for循环或提高效率？

2 个答案: