在具有beta分布的glmmTMB中考虑组之间的异方差

时间:2018-10-04 14:40:23

标签: r glm mixed-models

我正在为模型拟合一个连续比例的响应变量。它是AUC(曲线下的面积),范围是0到1。在我的预测变量中,各组之间存在一定的异方差性(分散性),请参见下面的残差图。我正在使用带有beta发行版的glmmTMB。我将模型类型作为随机因素包括在内(模型类型是运行物种分布模型的方式)。我正在使用R 3.3.2。模型中有什么方法可以解释这种异方差性吗?

数据:

dater<-structure(list(AUC_indep = c(0.6313, 0.6313, 0.5868, 0.6368, 0.6313,0.6563, 0.6053, 0.7342, 0.672, 0.686, 0.5947, 0.6084, 0.7057, 0.7243, 0.6783, 0.7021, 0.7548, 0.7508, 0.7573, 0.7427, 0.7788, 0.7941, 0.8052, 0.7817, 0.9409, 0.949, 0.9457, 0.9547, 0.9807, 0.9642, 0.98, 0.957, 0.8129, 0.8374, 0.8127, 0.8287, 0.8426, 0.8537, 0.8399, 0.8376, 0.9591, 0.9675, 0.956, 0.9672,0.9395, 0.9604, 0.9349, 0.9627, 0.7602, 0.7859, 0.7281, 0.775, 0.7289, 0.7787, 0.6937, 0.7312, 0.86, 0.8229, 0.8411, 0.8608, 0.8157, 0.8076, 0.8686,0.8692, 0.8576, 0.8617, 0.8208, 0.8028, 0.8623, 0.8873, 0.8347, 0.8224, 0.4529, 0.4638, 0.4529, 0.4457, 0.5217, 0.5399, 0.3877, 0.4384, 0.8571, 0.8745, 0.881, 0.8506, 0.8874, 0.9004, 0.8636, 0.9091, 0.8995, 0.9182, 0.8715, 0.8762, 0.8927, 0.8815, 0.8817, 0.8584, 0.8652, 0.8979, 0.8432, 0.8479, 0.8162, 0.855, 0.8257, 0.8563, 0.6419, 0.6361, 0.6529, 0.6209, 0.6401, 0.614, 0.6194, 0.6118, 0.9097, 0.9225, 0.9304, 0.9492, 0.9236, 0.9343, 0.9418, 0.9338, 0.8057, 0.8258, 0.7955, 0.8485, 0.8332, 0.8535, 0.8153, 0.8188, 0.4663, 0.4553, 0.4453, 0.4305, 0.4824, 0.4458, 0.467, 0.4417, 0.9628, 0.9625, 0.9646, 0.9612, 0.9699, 0.9664, 0.9712, 0.9693, 0.6563, 0.6643, 0.637, 0.607, 0.7008, 0.7017, 0.6563, 0.5769, 0.8449, 0.8687, 0.8061, 0.8303, 0.8684, 0.8839, 0.8225, 0.8736, 0.8284, 0.7983, 0.7143, 0.8066, 0.8578, 0.8326, 0.7065, 0.7274, 0.8017, 0.7909, 0.8369, 0.8091, 0.8441, 0.8502, 0.8345, 0.8438, 0.8187, 0.7846, 0.7044, 0.7658, 0.848, 0.8047, 0.7697, 0.8095, 0.4598, 0.4742, 0.3984, 0.3566, 0.6318, 0.5977, 0.4402, 0.441), dispersal = c("wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "wind/none", "wind/none", "wind/none","wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal","wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "none", "none", "none", "none", "none", "none", "none", "none", "winged", "winged", "winged", "winged", "winged", "winged", "winged", "winged", "none", "none", "none", "none", "none", "none", "none", "none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "none", "none", "none", "none", "none", "none", "none", "none", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "none", "none", "none", "none", "none", "none", "none", "none", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "animal", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none", "wind/none"), model = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), reg = c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B",  "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B")), .Names = c("AUC_indep", "dispersal", "model", "reg"), class = "data.frame", row.names = c(NA, -192L))

模型是:

library(glmmTMB)

m1<-glmmTMB(AUC_indep~reg+dispersal+(1|model), family=list(family="beta", link="logit"), data=dater); 

summary(m1);

Family: beta  ( logit )
Formula:          AUC_indep ~ dispersal + (1 | model)
Data: dater

 AIC      BIC   logLik deviance df.resid 
  -243.2   -223.6    127.6   -255.2      186 

Random effects:

Conditional model:
 Groups Name        Variance  Std.Dev. 
model  (Intercept) 1.045e-10 1.022e-05
Number of obs: 192, groups:  model, 4

Overdispersion parameter for beta family (): 7.97 

Conditional model:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)         1.17251    0.08496  13.800  < 2e-16 ***
dispersalnone      -0.29647    0.13204  -2.245  0.02475 *  
dispersalwind/none  0.40301    0.13554   2.973  0.00295 ** 
dispersalwinged     0.29193    0.28895   1.010  0.31235    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


ggplot(dater, aes(x=factor(dispersal), y=residuals(m1)))+geom_point()

residual plot

1 个答案:

答案 0 :(得分:0)

我相信您只需指定色散公式即可校正(即考虑)GLMTMTB中的异方差性。

参考:https://journal.r-project.org/archive/2017/RJ-2017-066/RJ-2017-066.pdf

从R-paper参考资料中:

“色散模型可用于解释异方差。例如,如果响应随着年份的发展而变化更大(相对于平均值),则具有负二项式分布的模型可能使用单边公式dispformula =〜DOY,其中DOY是一年中的一天。”

R包参考:https://cran.r-project.org/web/packages/glmmTMB/glmmTMB.pdf 在文档中搜索格式。

从软件包文档中获取

glmmTMB(formula, data = NULL, family = gaussian(), ziformula = ~0,
dispformula = ~1, weights = NULL, offset = NULL,
contrasts = NULL, na.action = na.fail, se = TRUE,
verbose = FALSE, doFit = TRUE, control = glmmTMBControl(),
REML = FALSE)

“ dispformula一个仅包含固定效果的色散的单边公式:默认〜1指定给定任何族的标准色散。对于不具有色散参数的族,该参数将被忽略。有关每个族的色散参数的说明,请参阅(sigma)。色散模型使用alog链接。在高斯混合模型中,dispformula =〜0将参数固定为be0,从而迫使方差成为随机效应。”