如何加入具有公共列的两个数据帧?

时间:2015-08-20 08:34:12

标签: r dataframe dplyr

我想根据另一列的行组合添加新列。

例如,假设我有一个如下数据框:

library(dplyr)
library(minpack.lm)
library(broom)
No  =  c(replicate(1,rep(letters[1:6],each=10)))
ACME <- as.character(rep(rep(c(78,110),each=10),times=3))
ARGON <- as.character(rep(rep(c(256,320,384),each=20),times=1))
V <- rep(c(seq(2,40,length.out=5),seq(-2,-40,length.out=5)),times=1)
DQ0 = c(replicate(2, sort(runif(10,0.001,1))))
direc <- rep(rep(c("North","South"),each=5),times=6)

df <- data.frame(No,ACME,ARGON,V,DQ0,direc)


>df
    No ACME ARGON     V        DQ0 direc
1    a   78   256   2.0 0.07532351 North
2    a   78   256  11.5 0.13785481 North
3    a   78   256  21.0 0.27397961 North
4    a   78   256  30.5 0.44296243 North
5    a   78   256  40.0 0.45721902 North
6    a   78   256  -2.0 0.68077463 North
7    a   78   256 -11.5 0.68764283 North
8    a   78   256 -21.0 0.76284209 North
9    a   78   256 -30.5 0.81040056 North
10   a   78   256 -40.0 0.95336230 North
11   b  110   256   2.0 0.04190305 South
12   b  110   256  11.5 0.17484353 South
13   b  110   256  21.0 0.22409319 South
----------------

我使用来自df

nlsLM功能使minpack.lm符合此标准

- &gt;适合部分

nls_fit=nlsLM(DQ0~ifelse(df$direc=="North"&V<J1, exp((-t_pw)/f0*exp(-del1*(1-V/J1)^2)),1)*ifelse(df$direc=="South"&V>J2, exp((-t_pw)/f0*exp(-del2*(1-V/J2)^2)),1)
            ,data=df,start=c(del1=1,J1=15,del2=1,J2=-15),trace=T) 

拟合后,我想创建一个新数据框df_new,其新列名为address

  df_new<- df%>%
  group_by(No)%>%
  do(data.frame(model=tidy(nls_fit)))%>% # **this part is related fit fitting result. After this process I got "model.term" and "model.estimate"** columns and in the next step I renamed them.
  select_("delta"="model.term","value"= "model.estimate")%>%
  filter(delta%in%c("del1","del2"))%>% #**I filter some fitting parameters**
  mutate(adress=interaction(ACME,ARGON))%>% #this part is not working  
  ungroup

我收到的错误是

错误:大小不一致(%d),期望%d(组大小)或1

最后我有一种没有mutatate部分

的输出
  

df_new

    No delta    value
1   a  del1 1.479056
2   a  del2 1.016404
3   b  del1 1.479056
4   b  del2 1.016404
5   c  del1 1.479056
6   c  del2 1.016404
7   d  del1 1.479056
8   d  del2 1.016404
9   e  del1 1.479056
10  e  del2 1.016404
11  f  del1 1.479056
12  f  del2 1.016404

我希望得到这样的东西;

    No delta  value    adress
1   a  del1 1.479056   78.256
2   a  del2 1.016404   78.256
3   b  del1 1.479056  110.256
4   b  del2 1.016404  110.256
5   c  del1 1.479056   78.320
6   c  del2 1.016404   78.320
7   d  del1 1.479056  110.320
8   d  del2 1.016404  110.320
9   e  del1 1.479056   78.384
10  e  del2 1.141958   78.384
11  f  del1 1.019201  110.384
12  f  del2 1.141958  110.384

1 个答案:

答案 0 :(得分:3)

您真正想要的是df_newdf之间的联接。您可以使用例如data.table

来执行此操作
library(data.table) #v1.9.5+
setDT(df_new)[df, adr:=adress, on="No"]

如果您想使用CRAN的最新版本,您可以这样做:

setDT(df_new, key="No")[setDT(df, key="No"), adr:=adress]

都给出以下结果:

> dt_new
    No delta    value     adr
 1:  a  del1 1.479056  78.256
 2:  a  del2 1.016404  78.256
 3:  b  del1 1.479056 110.256
 4:  b  del2 1.016404 110.256
 5:  c  del1 1.479056  78.320
 6:  c  del2 1.016404  78.320
 7:  d  del1 1.479056 110.320
 8:  d  del2 1.016404 110.320
 9:  e  del1 1.479056  78.384
10:  e  del2 1.016404  78.384
11:  f  del1 1.479056 110.384
12:  f  del2 1.016404 110.384

使用dplyr的方法:

df_new2 <- df %>% select(No, adress) %>% group_by(No) %>% 
  summarise(adr = unique(adress)) %>% 
  left_join(df_new, ., by="No")

给出相同的结果:

> identical(df_new2, setDF(df_new))
[1] TRUE

注意:我使用了development version of data.table