一次子集的相关性

时间:2014-02-27 16:24:04

标签: r

也许这个问题已经发布,但找不到能帮助我的东西。

我有一个数据框,这是一个40年的时间序列,有4列:第一个是年份(数字),第二个是月份(数字从1到12),第三个和第四个, place1place2的降水量。我想使用cor()place1place2的降水进行相关性分析,但希望每5年进行一次。此外,在该系列中,我有NA个值。有没有办法做到这一点?

以下是一些示例数据:

year<-rep(1940:1959, each = 12) 
month<-rep(1:12,20)
place1<-c(14.7,26.3,10.2,132.4,286.3,158.2,72,99.5,217.6,267.9,80.3,NA,38.9,20.9,29.1,312.2,110.1,245,163.2,38.3,251.3,95.3,89.4,13.5,13.3,49.1,26.9,105.6,188.7,186.1,140.5,241.6,143.2,156.9,37.4,29.8,19.6,27.3,80.7,102.9,222.5,88.4,59.1,107.3,119.5,451.2,52.2,0,14.3,7.9,55.4,31.1,152.2,190.7,251,200.2,158.7,93,44.3,40.3,18.6,15.2,11.4,110.3,377.9,42.3,68.2,289.5,219.7,133.2,114.4,115.2,15.3,14,86.7,66.1,204.1,33.9,51.8,83,238.8,231.4,70.6,41.7,99.5,176.4,1.3,63,238.2,48.6,82.6,66.9,257,141.4,14.5,35.5,28.6,32.5,1.3,50.7,300.8,74.1,110.9,64.8,128,309.9,71.1,22.6,2.5,2.3,57.6,24.4,171.9,91,116.3,224.3,123.5,149.1,17.8,26,62.8,47.1,9.6,38.1,72.2,141.2,52.2,110.7,246.6,330.5,8.6,38.6,57.5,26.7,0,210,601.2,79.4,166.2,128.8,133.5,81.8,42,30.4,12.5,20.3,27.7,191.6,223.6,63.5,175.3,42.3,277.9,60.9,26.5,9.7,59.7,9.4,40.5,70.1,307.1,163.5,230,51.8,160.4,115.9,54.4,25.3,15.3,67.6,77.9,108.8,283.5,297.2,99.9,103.4,277.4,474.6,91.8,23.9,43.4,12.7,3,179.5,259.4,154.3,201.1,363.3,253.7,257.9,38.2,71.3,29.5,95.1,128.2,36.7,137.8,182.6,85.8,23.6,48.7,218.1,30.4,42.3,35,43.9,30,58.2,139.2,99,39.6,13.9,152.6,117.6,39,25.9,169.6,31.2,63.1,124.2,377.4,279.8,168.2,100,191.9,108.6,55.2,27.7,16,8.1,5.6,75.7,38.8,131.7,131,135.9,97.4,188.9,304.8,34.6)
place2<-c(5.4,18,0,19,111.5,30.6,39.2,178.8,77.3,292.5,28,21,45.9,31.5,16.5,54.9,117.8,270.2,131.6,45.5,248.6,55.5,32.5,16.3,42.9,18,19.4,112.4,77,315.8,71.9,201.8,37.3,84.8,25.4,10.6,31.3,12.1,54.1,112.4,122.4,44.4,55.6,160.3,81,257.1,65.8,3.8,11.9,10.7,16.5,51.9,81.4,142,321.5,251.7,144.4,97.6,3,1.8,11.1,16.6,13.9,41.7,218,55.7,50.6,159.8,94,57.9,48.1,121.8,8.6,3.3,64.2,21.8,169.8,55.9,26.4,79,77.5,75.5,67.1,41.9,40.9,132.4,37.3,93.7,67.1,128,52.6,17.2,184.9,97.6,4.3,15.2,21.1,39.9,1.5,53.3,89.4,43,97.7,55.1,232.3,27.9,118.2,5.1,0,4.3,66.1,9.2,122.1,191.4,81.1,80.4,79.8,112.9,51.5,13.9,14,21.3,42,16.7,261.1,287,26.1,134.1,106.3,205.1,29.5,1.5,5.9,14.5,1,219.1,451.3,107,213.6,48.2,92.4,105.2,11.5,6.9,3,13.7,44.5,61.2,99.3,95.7,193.4,13.2,217.1,87.8,11.2,3,75.7,5.3,0,31.1,167.8,198.2,42.2,121.6,180.2,121.9,31.3,22.8,31.9,25.5,69.9,19.4,109.6,179.2,73.2,198.6,425,612.1,26.8,3,71.4,34.9,7.1,8.8,69.8,227.7,86.6,88.7,126,195.4,13.5,36.6,1,80.5,23.4,24.1,31.4,139.5,68.6,53.6,40,232.9,77,32.2,21.1,23.1,9.1,15.3,48.6,140.2,50.8,55.8,59.6,46.2,10.2,18.3,105.9,11.1,0,46.3,307.7,110.2,294.2,200.5,74.3,147.9,30.9,31,67.9,15.8,30.1,56.1,128,25.9,119.2,41.1,56.2,235.4,22.9,10.8)
data<-data.frame(year,month,place1,place2)

1 个答案:

答案 0 :(得分:1)

data$year.group <- cut(data$year,seq(1940,1960,by=5))
lapply(unique(data$year.group),
      function(x) with(data[data$year.group==x,],
                       cor(place1,place2,use='pairwise.complete.obs')))

或者,如果要将其扩展到多个列,请尝试以下操作:

lapply(unique(data$year.group),
       function(x) cor(data[data$year.group==x,c('place1','place2','place3')],
                       use='pairwise.complete.obs'))

(并根据需要更改use选项)