为什么并行R代码在具有更多内核的机器上不会更快?

时间:2017-03-10 16:14:01

标签: r foreach parallel-processing

我有lenovo w540 64位Windows 7,16 GB RAM,Intel(R)Core(TM)i7-4600M CPU @ 2.90 GHz运行64位R 3.2.2

我的电脑还配有64位Windows 7,16 gb内存,英特尔(R)Xeon(R)CPU E5-2620 v3 @ 2.40 GHz(处理器)

我有一个并行运行的功能。在我的4核笔记本电脑上,我运行了以下代码

library(splitstackshape)
library(foreach)
library(doParallel)

cores=detectCores()
cl <- makeCluster(cores[1]-1) 
registerDoParallel(cl)

#Initialize variables
partsOrder <- list()
numSim <- 10

#start clock
start <- proc.time()

SimResults <- foreach(j=1:100) %dopar%{
  RobotSimulation()
}

#stop using all cores
stopCluster(cl)

#stop the clock
end <- proc.time()-start
end[3]

我不明白的是,在我的4核笔记本电脑上,需要30分钟才能完成100次迭代。在我的24核笔记本电脑上,它需要相同的时间。我可以看到任务管理器中使用的所有24个核心,并且有24个R进程在运行。如果核心是逻辑的还是物理的,那么R是否重要?

编辑:添加了RobotSimulation()代码和一些示例数据

partsList <- data.frame(JobNum = sample(c('a','b','c','d'),4000,replace=TRUE)
,DS.CT = sample.int(10,4000,replace=TRUE)
,C1.CT = sample.int(10,4000,replace=TRUE)
,C2.CT = sample.int(10,4000,replace=TRUE)
,C3.CT = sample.int(10,4000,replace=TRUE)
,C4.CT = sample.int(10,4000,replace=TRUE)
,C5D5.CT = sample.int(10,4000,replace=TRUE)
,C6D6.CT = sample.int(10,4000,replace=TRUE)
,C5D7.CT = sample.int(10,4000,replace=TRUE)
,C6D8.CT = sample.int(10,4000,replace=TRUE)
,C7CD.CT = sample.int(10,4000,replace=TRUE)
)

RobotSimulation <- function(){

#randomize the dataset of parts and record the order
parts <- partsList[sample(nrow(partsList)),]
partsOrder <- list(parts$JobNum)

#choose a random sample of parts to populate the conveyor belts
#this random assignment will be constant across all iterations of the simulation
set.seed(101)
LineParts <- parts[sample(nrow(parts),234,replace=FALSE),]

#pass parts through system one at a time and record cycle times at each dip
LineParts_dfList <- lapply(seq(nrow(parts)), function(i){      
    #Index line
    LinePartsTemp <- parts[1,]
    LinePartsTemp[2:nrow(LineParts),] <- LineParts[1:nrow(LineParts)-1,]

    #put new part into system
    LinePartsTemp[1,] <- parts[i,]

    #update the list of parts on the line
    LineParts <<- LinePartsTemp      
})

#combine the results of passing all parts through the system
LinePartsAll <- do.call(rbind, LineParts_dfList)

#find the cycle times at each robot for each time a new part went through
otherstations_veclist <- 
  list(
    DS = LinePartsAll[seq(1,nrow(LinePartsAll), by=234),'DS.CT'],
    D1 = LinePartsAll[seq(10,nrow(LinePartsAll), by=234),'C1.CT'],
    D2 = LinePartsAll[seq(26,nrow(LinePartsAll), by=234),'C2.CT'],
    D3 = LinePartsAll[seq(42,nrow(LinePartsAll), by=234),'C3.CT'],
    D4 = LinePartsAll[seq(57,nrow(LinePartsAll), by=234),'C4.CT'],
    D5 = LinePartsAll[seq(85,nrow(LinePartsAll), by=234),'C5D5.CT'],
    D6 = LinePartsAll[seq(120,nrow(LinePartsAll), by=234),'C6D6.CT'],
    D7 = LinePartsAll[seq(167,nrow(LinePartsAll), by=234),'C5D7.CT'],
    D8 = LinePartsAll[seq(210,nrow(LinePartsAll), by=234),'C6D8.CT'],
    D9 = LinePartsAll[seq(216,nrow(LinePartsAll), by=234),'C7CD.CT']
  )

#record results
result <- list(partsOrder = partsOrder, CT = otherstations_veclist)
return(result)
}

0 个答案:

没有答案