Je vais essayer bambi (version 0.1.0) pour un simple modèle de régression de Poisson. Cependant, les résultats diffèrent par rapport aux implémentations droites de pymc3 ou statsmodels, et je n'arrive pas à comprendre comment interpréter les coefficients que me donne bambi. Le code de test est ci-dessous. Ai-je spécifié le mauvais modèle, ou devrais-je ne pas compter sur les prieurs automatiques de bambi?Des résultats incorrects avec la régression de Poisson en utilisant bambi?
import numpy as np
import scipy.stats
import pandas
import patsy
import statsmodels
import pymc3
import bambi
%matplotlib inline
# generate data
num_subjects = 4
mu = [5, 8, 10, 11]
num_samples = [43, 60, 56, 38]
counts = [scipy.stats.poisson.rvs(m,size=n,random_state=m) for m,n in zip(mu,num_samples)]
counts = np.concatenate(counts)
subject = np.repeat(np.arange(num_subjects), num_samples)
df = pandas.DataFrame(np.vstack([subject,counts]).T, columns=['subject','counts'])
# sample means
print(df.groupby('subject').mean())
# subject 0 = 5.0
# subject 1 = 7.4
# subject 2 = 9.5
# subject 3 = 10.0
# fit with bambi
model_bambi = bambi.Model(df)
result_bambi = model_bambi.fit('counts ~ C(subject)', categorical=['subject'], family='poisson', chains=2)
print(result_bambi.summary(hpd=None, diagnostics=None))
# resulting posterior means:
# Intercept 9.3310 -> ?
# C(subject)[T.1] 3.8171 -> ?
# C(subject)[T.2] 4.4419 -> ?
# C(subject)[T.3] 3.8652 -> ?
# fit directly with pymc3
with pymc3.Model() as model_pymc3:
pymc3.glm.GLM.from_formula("counts ~ C(subject)", df, family=pymc3.glm.families.Poisson())
trace = pymc3.sample(2000, njobs=2, tune=500)
pymc3.plot_posterior(trace, varnames=[x for x in trace.varnames if x[:2]!='mu']);
# resulting posterior means:
# Intercept 1.6065 -> mu = 5.0 = exp(1.6065)
# C(subject)[T.1] 0.3990 -> mu = 7.4 = exp(1.6065+0.3990)
# C(subject)[T.2] 0.6477 -> mu = 9.5 = exp(1.6065+0.6477)
# C(subject)[T.3] 0.6977 -> mu = 10.0 = exp(1.6065+0.6977)
# fit with statsmodels
my, mx = patsy.dmatrices("counts ~ C(subject)", df, NA_action='raise')
model_sm = statsmodels.api.GLM(my, mx, family=statsmodels.api.families.Poisson())
result_sm = model_sm.fit()
print(result_sm.summary())
# resulting posterior means:
# Intercept 1.6094 -> mu = 5.0 = exp(1.6094)
# C(subject)[T.1] 0.3965 -> mu = 7.4 = exp(1.6094+0.3965)
# C(subject)[T.2] 0.6456 -> mu = 9.5 = exp(1.6094+0.6456)
# C(subject)[T.3] 0.6958 -> mu = 10.0 = exp(1.6094+0.6958)