Question

我在使用特定曲线以适应R时遇到一些困难，而在商业曲线拟合程序中它完全正常。

数据应适合的公式为：

y(t) = A * exp(-a*(t)) + B * exp(-b*(t)) - (A+B) * exp(-c*(t))

所以为此，我想使用内置于R中的非线性回归。我现在已经在这一天开启和关闭，并且无法使其正常运行。问题完全在于初始值，因此我使用NLS2来强制查找初始值。

y <- c(0,0.01377,0.01400875,0.0119175,0.00759375,0.00512125,0.004175,0.00355375,
0.00308875,0.0028925,0.00266375)
t <- c(0,3,6,12,24,48,72,96,120,144,168)
df <- data.frame(t,y)
plot(t,y);
#Our model:
fo <- y ~ f1*exp(-k1*t)+f2*exp(-k2*t)-(f1+f2)*exp(-k3*t);

#Define the outer boundaries to search for initial values
grd <- data.frame(f1=c(0,1),
              f2=c(0,1),
              k1=c(0,2),
              k2=c(0,2),
              k3=c(0,0.7));

#Do the brute-force
fit <- nls2(fo,
        data=df,
        start = grd,
        algorithm = "brute-force",
        control=list(maxiter=20000))
fit
coef(fit)
final <- nls(fo, data=df, start=as.list(coef(fit)))

它应该给出的值是：

f1  0.013866
f2  0.005364
k1  0.063641
k2  0.004297
k3  0.615125

虽然迭代值非常高，但我只是得到无意义的回报。我显然做错了什么，但我看不到它

根据@Roland的评论进行编辑：

使用线性方法近似k1-3建议的方法似乎适用于某些数据集，但不适用于所有数据集。以下是我现在根据您的输入使用的代码。

#Oral example:
y <- c(0,0.0045375,0.0066325,0.00511375,0.00395875,0.003265,0.00276,
0.002495,0.00231875);
t <- c(0,12,24,48,72,96,120,144,168);
#IV example:
#y <- c(0,0.01377,0.01400875,0.0119175,0.00759375,0.00512125,0.004175,
#0.00355375,0.00308875,0.0028925,0.00266375)
#t <- c(0,3,6,12,24,48,72,96,120,144,168)
DF <- data.frame(y, t)
fit1 <- nls(y ~ cbind(exp(-k1*t), exp(-k2*t), exp(-k3*t)), algorithm = "plinear", data = DF,
            start = list(k1 = 0.002, k2 = 0.02, k3= 0.2))
k1_predict <-summary(fit1)$coefficients[1,1]
k2_predict <-summary(fit1)$coefficients[2,1]
k3_predict <-summary(fit1)$coefficients[3,1]
fo <- y ~ f1*exp(-k1*t)+f2*exp(-k2*t)-(f1+f2)*exp(-k3*t);
fit2 <- nls(fo, data = DF, 
            start = list(k1 = k1_predict, k2 = k2_predict, k3 = k3_predict, f1 = 0.01, f2 = 0.01))
summary(fit2);
plot(t,y);
curve(predict(fit2, newdata = data.frame(t = x)), 0, 200, add = TRUE, col = "red")

oral_example fit

@G。格罗腾迪克与Roland的建议类似，您的建议也非常出色，因为它能够拟合某些数据集，但却与其他数据集斗争。下面的代码基于您的输入，并以奇异的梯度矩阵退出。

#Oral example:
y <- c(0,0.0045375,0.0066325,0.00511375,0.00395875,0.003265,0.00276,
0.002495,0.00231875);
t <- c(0,12,24,48,72,96,120,144,168);
#IV example:
#y <- c(0,0.01377,0.01400875,0.0119175,0.00759375,0.00512125,0.004175,
#0.00355375,0.00308875,0.0028925,0.00266375)
#t <- c(0,3,6,12,24,48,72,96,120,144,168)
df <- data.frame(y, t)
grd <- data.frame(f1=c(0,1),
              f2=c(0,1),
              k1=c(0,2),
              k2=c(0,2),
              k3=c(0,0.7));
set.seed(123)
fit <- nls2(fo,
        data=df,
        start = grd,
        algorithm = "random",
        control = nls.control(maxiter = 100000))
nls(fo, df, start = coef(fit), alg = "port", lower = 0)
plot(t,y);
curve(predict(nls, newdata = data.frame(t = x)), 0, 200, add = TRUE, col = "red")

Answer 1

我首先进行部分线性拟合，对线性参数没有约束，以获得指数参数的良好起始值以及关于线性参数大小的一些想法：

DF <- data.frame(y, t)
fit1 <- nls(y ~ cbind(exp(-k1*t), exp(-k2*t), exp(-k3*t)), algorithm = "plinear", data = DF,
            start = list(k1 = 0.002, k2 = 0.02, k3= 0.2))
summary(fit1)
#Formula: y ~ cbind(exp(-k1 * t), exp(-k2 * t), exp(-k3 * t))
#
#Parameters:
#        Estimate Std. Error t value Pr(>|t|)    
#k1     0.0043458  0.0010397   4.180 0.008657 ** 
#k2     0.0639379  0.0087141   7.337 0.000738 ***
#k3     0.6077646  0.0632586   9.608 0.000207 ***
#.lin1  0.0053968  0.0006637   8.132 0.000457 ***
#.lin2  0.0139231  0.0008694  16.014 1.73e-05 ***
#.lin3 -0.0193145  0.0010631 -18.168 9.29e-06 ***

然后你可以适合你的实际模型：

fit2 <- nls(fo, data = DF, 
            start = list(k1 = 0.06, k2 = 0.004, k3 = 0.6, f1 = 0.01, f2 = 0.01))
summary(fit2)  
#Formula: y ~ f1 * exp(-k1 * t) + f2 * exp(-k2 * t) - (f1 + f2) * exp(-k3 * t)
#
#Parameters:
#    Estimate Std. Error t value Pr(>|t|)    
#k1 0.0639344  0.0079538   8.038 0.000198 ***
#k2 0.0043456  0.0009492   4.578 0.003778 ** 
#k3 0.6078929  0.0575616  10.561 4.24e-05 ***
#f1 0.0139226  0.0007934  17.548 2.20e-06 ***
#f2 0.0053967  0.0006059   8.907 0.000112 ***         

curve(predict(fit2, newdata = data.frame(t = x)), 0, 200, add = TRUE, col = "red")

请注意，通过切换指数项（即kn起始值的顺序）可以轻松地重新参数化此模型，这可能会导致对f1和{{1但是，基本上相同。

Answer 2

有了这么多参数，我会使用algorithm = "random"而不是"brute"。如果我们这样做，那么下面给出的结果接近于问题中的结果（由于模型参数的对称性，直到参数的排列）：

set.seed(123)
fit <- nls2(fo,
        data=df,
        start = grd,
        algorithm = "random",
        control = nls.control(maxiter = 20000))
nls(fo, df, start = coef(fit), alg = "port", lower = 0)

，并提供：

Nonlinear regression model
  model: y ~ f1 * exp(-k1 * t) + f2 * exp(-k2 * t) - (f1 + f2) * exp(-k3 * t)
   data: df
      f1       f2       k1       k2       k3 
0.005397 0.013923 0.004346 0.063934 0.607893 
 residual sum-of-squares: 2.862e-07

Algorithm "port", convergence message: relative convergence (4)

已添加

上述的一种变体是在minpack.lm包中使用nlsLM而不是nls，并使用样条线来获取数据集中的更多点。代替nls行尝试以下操作。它仍然提供了融合：

library(minpack.lm)
t_s <- with(df, min(t):max(t))
df_s <- setNames(data.frame(spline(df$t, df$y, xout = t_s)), c("t", "y"))
nlsLM(fo, df_s, start = coef(fit), lower = rep(0,5), control = nls.control(maxiter = 1024))

并且在口头例子中也是如此：

set.seed(123)
y <- c(0,0.0045375,0.0066325,0.00511375,0.00395875,0.003265,0.00276,
0.002495,0.00231875);
t <- c(0,12,24,48,72,96,120,144,168)
DF <- data.frame(y, t)
grd <- data.frame(f1=c(0,1), f2=c(0,1), k1=c(0,2), k2=c(0,2), k3=c(0,0.7))
fit <- nls2(fo,
        data=DF,
        start = grd,
        algorithm = "random",
        control = nls.control(maxiter = 20000))

library(minpack.lm)
t_s <- with(DF, min(t):max(t))
df_s <- setNames(data.frame(spline(DF$t, DF$y, xout = t_s)), c("t", "y"))
nlsLM(fo, df_s, start = coef(fit), lower = rep(0,5), control = nls.control(maxiter = 1024))

R曲线拟合（多指数）与NLS2和NLS

2 个答案: