Python:用于回归模型的Gibbs采样器

时间:2017-05-17 01:43:37

标签: python python-3.x bayesian mcmc

我正在尝试在贝叶斯框架中为Gibbs采样器编写一个函数。我从这个[网站] [1]得到了代码,这是一个简单的回归模型。但是,我正在处理一个更复杂的模型:y = beta0 + beta1 * x + x ^ gamma * sigma * epsilon其中sigma是模型的方差。这意味着我需要估计p(beta0 | y,x,beta1,sigma,gamma)等等(在Gibbs采样器方法中)。我的问题是我应该如何修改代码以对beta0,beta1和其他变量进行采样,因为还有额外的变量需要调整。 我的代码是:

import numpy as np
import pymc as pm
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
plt.rcParams['figure.figsize'] = (10, 5)
conda install -c conda-forge pymc3=3.0


def sample_beta_0(y, x, beta_1, sigma, gamma, mu_0,  tau_0):
    N = len(y)
    assert len(x) == N
    tau_i = 1/((x**gamma)*sigma)**2
    precision = tau_0 + sum(tau_i)
    mean = tau_0 * mu_0 +  np.sum((y - beta_1 * x)*tau_i)
    mean /= precision
    return np.random.normal(mean, 1 / np.sqrt(precision))  

def sample_beta_1(y, x, beta_0, sigma, mu_1, sigma_1):
    N = len(y)
    assert len(x) == N
    precision = sigma_1 + sigma * np.sum(x * x)
    mean = sigma_1 * mu_1 + sigma * np.sum( (y - beta_0) * x)
    mean /= precision
    return np.random.normal(mean, 1 / np.sqrt(precision))


def sample_sigma(y, x, beta_0, beta_1, alpha, beta):
    N = len(y)
    alpha_new = alpha + N / 2
    resid = y - beta_0 - beta_1 * x
    beta_new = beta + np.sum(resid * resid) / 2
    return np.random.gamma(alpha_new, 1 / beta_new)

beta_0_true = -1
beta_1_true = 2
sigma_true = 1

N = 50
x = np.random.uniform(low=0, high=4, size=N)
y = np.random.normal(beta_0_true + beta_1_true * x, 1 / np.sqrt(sigma_true))

synth_plot = plt.plot(x, y, "o")
plt.xlabel("x")
plt.ylabel("y")

# print('Y are', y)
# print('X are', x)
plt.show()


"""GIBBS Sampler"""
# specify initial values
init = {"beta_0": 0,
        "beta_1": 0,
        "sigma": 2}

# specify hyper parameters
hypers = {"mu_0": 0,
         "sigma_0": 1,
         "mu_1": 0,
         "sigma_1": 1,
         "alpha": 2,
         "beta": 1}


def gibbs(y, x, iters, init, hypers):
    assert len(y) == len(x)
    beta_0 = init["beta_0"]
    beta_1 = init["beta_1"]
    sigma = init["sigma"]

    trace = np.zeros((iters, 3))  # trace to store values of beta_0, beta_1, sigma

    for it in range(iters):
        beta_0 = sample_beta_0(y, x, beta_1, sigma, hypers["mu_0"], hypers["sigma_0"])
        beta_1 = sample_beta_1(y, x, beta_0, sigma, hypers["mu_1"], hypers["sigma_1"])
        sigma = sample_sigma(y, x, beta_0, beta_1, hypers["alpha"], hypers["beta"])
        trace[it, :] = np.array((beta_0, beta_1, sigma))

    trace = pd.DataFrame(trace)
    trace.columns = ['beta_0', 'beta_1', 'sigma']
    print(trace)
    return trace

iters = 1000
trace = gibbs(y, x, iters, init, hypers)

traceplot = trace.plot()
traceplot.set_xlabel("Iteration")
traceplot.set_ylabel("Parameter value")


trace_burnt = trace[500:999]
hist_plot = trace_burnt.hist(bins = 30, layout = (1,3))
print(trace_burnt.median())
print(trace_burnt.std())

我知道真的很长但请求帮助!

0 个答案:

没有答案