创建一个处理我的数据帧计算的函数

时间:2014-09-18 02:35:10

标签: r dataframe analysis

我正在为我创建的数据帧进行系统计算。我有计算代码,但我想:

1)将它作为一个函数使用并为我创建的数据帧调用它。

2)重置数据帧中下一个ID的计算。

感谢您对此的帮助和建议。

使用以下代码在R中创建数据框:

#Create a dataframe
dosetimes <- c(0,6,12,18)

df <- data.frame("ID"=1,"TIME"=sort(unique(c(seq(0,30,1),dosetimes))),"AMT"=0,"A1"=NA,"WT"=NA)
doserows <- subset(df, TIME%in%dosetimes)

doserows$AMT[doserows$TIME==dosetimes[1]] <- 100 
doserows$AMT[doserows$TIME==dosetimes[2]] <- 100
doserows$AMT[doserows$TIME==dosetimes[3]] <- 100
doserows$AMT[doserows$TIME==dosetimes[4]] <- 100

#Add back dose information
df <- rbind(df,doserows)
df <- df[order(df$TIME,-df$AMT),]       
df <- subset(df, (TIME==0 & AMT==0)==F)

df$A1[(df$TIME==0)] <- df$AMT[(df$TIME ==0)]


#Time-dependent covariate
df$WT <- 70                    
df$WT[df$TIME >= 12] <- 120  

#The calculations are done in a for-loop. Here is the code for it:
#values needed for the calculation
C <- 2     
V  <- 10    
k <- C/V

#I would like this part to be written as a function

for(i in 2:nrow(df))
{

t <- df$TIME[i]-df$TIME[i-1]
A1last <- df$A1[i-1]

df$A1[i] = df$AMT[i]+ A1last*exp(-t*k)
}

head(df)

plot(A1~TIME, data=df, type="b", col="blue", ylim=c(0,150))

另一个原因是前面的代码假定所有时间点的主题ID = 1。当WT(重量)更改为120时,如果主题ID = 2.如何重置计算并使数据框中的所有主题ID自动化?在这种情况下,原始数据框将如下所示:

#code:
rm(list=ls(all=TRUE))
dosetimes <- c(0,6,12,18)
df <- data.frame("ID"=1,"TIME"=sort(unique(c(seq(0,30,1),dosetimes))),"AMT"=0,"A1"=NA,"WT"=NA)
doserows <- subset(df, TIME%in%dosetimes)
doserows$AMT[doserows$TIME==dosetimes[1]] <- 100 
doserows$AMT[doserows$TIME==dosetimes[2]] <- 100
doserows$AMT[doserows$TIME==dosetimes[3]] <- 100
doserows$AMT[doserows$TIME==dosetimes[4]] <- 100
df <- rbind(df,doserows)
df <- df[order(df$TIME,-df$AMT),]       
df <- subset(df, (TIME==0 & AMT==0)==F)
df$A1[(df$TIME==0)] <- df$AMT[(df$TIME ==0)]
df$WT <- 70                    
df$WT[df$TIME >= 12] <- 120 
df$ID[(df$WT>=120)==T] <- 2
df$TIME[df$ID==2] <- c(seq(0,20,1))

提前谢谢!

2 个答案:

答案 0 :(得分:0)

你想要这个:

ddf <- data.frame("ID"=1,"TIME"=sort(unique(c(seq(0,30,1),dosetimes))),"AMT"=0,"A1"=NA,"WT"=NA)

myfn = function(df){

    dosetimes <- c(0,6,12,18)
    doserows <- subset(df, TIME%in%dosetimes)

    doserows$AMT[doserows$TIME==dosetimes[1]] <- 100 
    doserows$AMT[doserows$TIME==dosetimes[2]] <- 100
    doserows$AMT[doserows$TIME==dosetimes[3]] <- 100
    doserows$AMT[doserows$TIME==dosetimes[4]] <- 100

    #Add back dose information
    df <- rbind(df,doserows)
    df <- df[order(df$TIME,-df$AMT),]       
    df <- subset(df, (TIME==0 & AMT==0)==F)

    df$A1[(df$TIME==0)] <- df$AMT[(df$TIME ==0)]


    #Time-dependent covariate
    df$WT <- 70                    
    df$WT[df$TIME >= 12] <- 120  

    #The calculations are done in a for-loop. Here is the code for it:
    #values needed for the calculation
    C <- 2     
    V  <- 10    
    k <- C/V

    #I would like this part to be written as a function

    for(i in 2:nrow(df))
    {

    t <- df$TIME[i]-df$TIME[i-1]
    A1last <- df$A1[i-1]

    df$A1[i] = df$AMT[i]+ A1last*exp(-t*k)
    }

    head(df)

    plot(A1~TIME, data=df, type="b", col="blue", ylim=c(0,150))

}

myfn(ddf)

对于多个来电:

for(i in 1:N) {
    myfn(ddf[ddf$ID==i,])
    readline(prompt="Press <Enter> to continue...") 
}

答案 1 :(得分:0)

通常,在对不同主题的数据进行计算时,我喜欢按ID分割数据帧,将单个主题数据的向量传递给for循环,进行所有计算,构建包含所有数据的向量新计算的数据,然后折叠结果并返回包含所需数字的数据框。这样可以对您为每个主题做的事情进行大量控制

subjects = split(df, df$ID)
forResults = vector("list", length=length(subjects))

# initialize these constants
C <- 2     
V  <- 10    
k <- C/V

myFunc = function(data, resultsArray){
  for(k in seq_along(subjects)){
    df = subjects[[k]]
    df$A1 = 100 # I assume this should be 100 for t=0 for each subject?

    # you could vectorize this nested for loop..
    for(i in 2:nrow(df)) {

      t <- df$TIME[i]-df$TIME[i-1]
      A1last <- df$A1[i-1]

      df$A1[i] = df$AMT[i]+ A1last*exp(-t*k)
    }

    head(df)

    # you can add all sorts of other calculations you want to do on each subject's data

    # when you're done doing calculations, put the resultant into 
    # the resultsArray and we'll rebuild the dataframe with all the new variables
    resultsArray[[k]] = df

    # if you're not using RStudio, then you want to use dev.new() to instantiate a new plot canvas
    # dev.new()   # dont need this if you're using RStudio (which doesnt allow multiple plots open)
    plot(A1~TIME, data=df, type="b", col="blue", ylim=c(0,150))

  }

  # collapse the results vector into a dataframe
  resultsDF = do.call(rbind, resultsArray)
  return(resultsDF)
}

results = myFunc(subjects, forResults)