Question

我有几个带有700多个二进制编码栅格的目录，我平均每个目录的输出栅格。但是，我目前在for循环中逐个创建栅格1，然后将新创建的栅格加载到R中以获取总和以获得月降雨总量。

然而，由于我不需要单独的栅格，只有平均栅格，我有预感，我可以做到这一切所有w / in 1循环而不是保存栅格但只是输出平均栅格，但我即将到来简要介绍如何在R中编程。

setwd("~/Desktop/CMORPH/Levant-Clip/200001")

dir.output <- '~/Desktop/CMORPH/Levant-Clip/200001' ### change as needed to give output location
path <- list.files("~/Desktop/CMORPH/MonthlyCMORPH/200001",pattern="*.bz2", full.names=T, recursive=T)

for (i in 1:length(path)) {
  files = bzfile(path[i], "rb")
  data <- readBin(files,what="double",endian = "little", n = 4948*1649, size=4) #Mode of the vector to be read
  data[data == -999] <- NA #covert missing data from -999(CMORPH notation) to NAs
  y<-matrix((data=data), ncol=1649, nrow=4948)
  r <- raster(y)
  e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info 
  tr <- t(r) #transpose 
  re <- setExtent(tr,extent(e)) ### set the extent to the raster
  ry <- flip(re, direction = 'y')
  projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
  C_Lev <- crop(ry, Levant) ### Clip to Levant
  M_C_Lev<-mask(C_Lev, Levant)
  writeRaster(M_C_Lev, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original
}
# 
raspath <- list.files ('~/Desktop/CMORPH/Levant-Clip/200001',pattern="*.tif",     full.names=T, recursive=T)
rasstk <- stack(raspath)
sum200001<-sum(rasstk)
writeRaster(avg200001, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original

目前，此代码需要大约75分钟才能执行，而且我还有大约120个目录，我正在寻找更快的解决方案。

感谢所有人以及任何意见和建议。最好的，埃文

Answer 1

在阐述我之前的评论时，您可以尝试：

setwd("~/Desktop/CMORPH/Levant-Clip/200001")

dir.output <- '~/Desktop/CMORPH/Levant-Clip/200001' ### change as needed to give output location
path <- list.files("~/Desktop/CMORPH/MonthlyCMORPH/200001",pattern="*.bz2", full.names=T, recursive=T)
raster_list = list()
for (i in 1:length(path)) {
  files = bzfile(path[i], "rb")
  data <- readBin(files,what="double",endian = "little", n = 4948*1649, size=4) #Mode of the vector to be read
  data[data == -999] <- NA #covert missing data from -999(CMORPH notation) to NAs
  y<-matrix((data=data), ncol=1649, nrow=4948)
  r <- raster(y)
  if (i == 1) {
    e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info 

  }
  tr <- t(r) #transpose 
  re <- setExtent(tr,extent(e)) ### set the extent to the raster
  ry <- flip(re, direction = 'y')
  projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
  C_Lev <- crop(ry, Levant) ### Clip to Levant
  M_C_Lev<-mask(C_Lev, Levant)
  raster_list[[i]] = M_C_Lev
}
# 

rasstk <- stack(raster_list, quick = TRUE) # OR rasstk <- brick(raster_list, quick = TRUE)
avg200001<-mean(rasstk)
writeRaster(avg200001, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original

使用stack中的“快速”选项绝对可以加快速度，特别是如果你有很多栅格。

另一种可能性是首先计算平均值，然后执行“空间处理”。例如：

for (i in 1:length(path)) {
  files = bzfile(path[i], "rb")
  data <- readBin(files,what="double",endian = "little", n = 4948*1649, size=4) #Mode of the vector to be read
  data[data == -999] <- NA #covert missing data from -999(CMORPH notation) to NAs

  if (i == 1) {
   totdata  <-  data 
   num_nonNA <- as.numeric(!is.na(data))
  } else {
totdata = rowSums(cbind(totdata,data), na.rm = TRUE)
# We have to count the number of "valid" entries so that the average is correct !
num_nonNA = rowSums(cbind(num_nonNA,as.numeric(!is.na(data))),na.rm = TRUE)
  }
}

avg_data = totdata/num_nonNA # Compute the average

# Now do the "spatial" processing

y<-matrix(avg_data, ncol=1649, nrow=4948)
r <- raster(y)
e <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info 
tr <- t(r) #transpose 
re <- setExtent(tr,extent(e)) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"
C_Lev <- crop(avg_data, Levant) ### Clip to Levant
M_C_Lev<-mask(C_Lev, Levant)
writeRaster(M_C_Lev, paste(dir.output, basename(path[i]), sep = ''), format = 'GTiff', overwrite = T) ###the basename allows the file to be named the same as the original

这可能更快或更慢，取决于您裁剪原始数据的“多少”。

HTH，

洛伦佐

Answer 2

我添加了另一个答案来澄清和简化一些事情，也与聊天中的评论有关。下面的代码应该按照您的要求执行：即循环文件，读取＆＃34;数据＆＃34;，计算所有文件的总和，并将其转换为具有指定尺寸的栅格。

请注意，为了测试目的，我在文件名中用一个简单的1到720循环替换你的循环，并且创建与你的长度相同的数组的文件读取填充从1到1的值4和一些NA！

totdata <- array(dim = 4948*1649)  # Define Dummy array
for (i in 1:720) {
  message("Working on file: ", i)
  data <- array(rep(c(1,2,3,4),4948*1649/4), dim = 4948*1649) # Create a "fake" 4948*1649 array  each time to simulate data reading
  data[1:1000] <- -999   # Set some values to NA
  data[data == -999] <- NA #convert missing data from -999

  totdata <- rowSums(cbind(totdata, data), na.rm = T)   # Let's sum the current array with the cumulative sum so far
}

# Now reshape to matrix and convertt to raster, etc.
y  <- matrix(totdata, ncol=1649, nrow=4948)
r  <- raster(y)
e  <- extent(-180, 180, -90, 83.6236) ### choose the extent based on the netcdf file info
tr <- t(r) #transpose
re <- setExtent(tr,e) ### set the extent to the raster
ry <- flip(re, direction = 'y')
projection(ry) <- "+proj=longlat +datum=WGS84 +ellps=WGS84"

这会产生一个＆＃34;正确的＆＃34;栅格：

> ry
class       : RasterLayer 
dimensions  : 1649, 4948, 8159252  (nrow, ncol, ncell)
resolution  : 0.07275667, 0.1052902  (x, y)
extent      : -180, 180, -90, 83.6236  (xmin, xmax, ymin, ymax)
coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
data source : in memory
names       : layer 
values      : 0, 2880  (min, max)

包含不同数组的总和：你可以注意到最大值是720 * 4 = 2880（只有警告：如果你的单元总是在NA，你会得到0而不是NA ）

在我的笔记本电脑上，大约需要5分钟！

在实践中：

为避免内存问题，我不是在内存中读取所有数据。你的每个阵列都或多或少是64MB，所以我无法加载它们然后做总和（除非我有50 GB的RAM扔掉 - 甚至在那种情况会很慢）。我改为使用关联通过计算＆＃34;累积＆＃34;来求和的预言。各自的总和周期。通过这种方式，您只需使用两个8百万个阵列时间：你从文件＆＃34; i＆＃34;中读取的那个，以及包含的那个目前的总和。
为避免不必要的计算，我直接总结了我从阅读二进制文件得到的一维数组。你不需要重构为循环中的数组，因为你可以做到这一点在决赛＆＃34;总结＆＃34;然后可以转换为矩阵形式的数组

我希望这对你有用，我不会错过一些明显的东西！

据我所知，如果使用这种方法仍然很慢，你在其他地方遇到问题（例如在数据读取中：在720个文件上，每个文件读取花费3秒意味着大约35分钟的处理时间。） / p>

HTH，

洛伦佐

如何在for循环中添加平均栅格来创建栅格？ [R

2 个答案: