r - Simulating data for multi-level logit model -
i attempting simulate multi-level data (random intercept) testing multi-level bayesian logistic regression model. have done far:
n.country <- 42 #number of units obs <- rbinom(n.country, 10, .8) #number of obs. per unit n.obs <- sum(obs) #overall number of obs country <- rep(1:n.country, obs) e.country <- rnorm(n.country,0, 3) #country-specific error e.obs <- rnorm(n.obs, 0, 5) #observation-specific error # continuous independent variables set.seed(666) x1 = rnorm(n.obs) x2 = rnorm(n.obs) x3 = rnorm(n.obs) x4 = rnorm(n.obs) x5 = rnorm(n.obs) x6 = rnorm(n.obs) x7 = rnorm(n.obs) x8 = rnorm(n.obs) #dependent variable l <- 2*x1 + 3*x2 + 1.75*x3 + 2.5*x4 + 1.5*x5 + 3.5*x6 + 0.5*x7 + 1.75*x8 + e.country[country] + e.obs ## prob. of dependent variable described linear ## combination of covariates , country-specific intercepts pr = 1/(1+exp(-l))# pass through inv-logit function y = rbinom(n.obs,1,pr) #dv described probability #set data frame test = data.frame(y=y,x1=x1,x2=x2,x3=x3,x4=x4,x5=x5,x6=x6, x7=x7,x8=x8,country=country)
however, when applying estimator it, yields different coefficients ones have set up, , glmer()
function of lme4
package. both interested in random , fixed effects. bayesian estimator looks this:
writelines(" model{ (i in 1:n){ y[i] ~ dbin(p.bound[i], 1) p.bound[i] <- max(0, min(1, p[i])) logit(p[i]) <- xbeta[i] #likelihood xbeta[i] <- v[country[i]] + beta[1]*x1[i] + beta[2]*x2[i] + beta[3]*x3[i] + beta[4]*x4[i] + beta[5]*x5[i] + beta[6]*x6[i] + beta[7]*x7[i] + beta[8]*x8[i] } #linear combination (j in 1:n){ v[j] ~ dnorm(beta0, prec.tau2) #country-specific intercept } beta[1] ~ dmnorm(0,0.001) #iv priors beta[2] ~ dmnorm(0,0.001) beta[3] ~ dmnorm(0,0.001) beta[4] ~ dmnorm(0,0.001) beta[5] ~ dmnorm(0,0.001) beta[6] ~ dmnorm(0,0.001) beta[7] ~ dmnorm(0,0.001) beta[8] ~ dmnorm(0,0.001) beta0 ~ dnorm(0,0.001) prec.tau2 ~dgamma(0.001,0.001) #hyperparameter priors tau <- sqrt(1/prec.tau2) }", con="multi.logit.jag") y <- vars$fix x1 <- vars$h_polcon3 x2 <- vars$imf_infl x3 <- vars$imf_ue x4 <- vars$imf_gd x5 <- vars$wdi_trade x6 <- vars$dr_eg x7 <- vars$wdi_gdppcgr x8 <- vars$dpi_erlc n <- 607 country <- vars$cname #specify levels country <- as.factor(country) nlevels(country) #sample consists of 42 countries country <- as.numeric(country) #transfer factor variabel numeric variable in order make model run n <- 42 m.data.multi <-list('n'=n, 'n'= n, 'y'=y, 'country' = country, 'x1'=x1, 'x2'=x2, 'x3'=x3, 'x4'=x4, 'x5'=x5, 'x6'=x6, 'x7'=x7, 'x8'=x8) library(coda) library(rjags) library(r2jags) m.sim.multi <- jags(data=m.data.multi, parameters.to.save="beta", model.file="multi.logit.jag", n.chains=3, n.iter=2000, n.burnin=500) print(m.sim.multi)
and yields
inference bugs model @ "multi.logit.jag", fit using jags, 3 chains, each 2000 iterations (first 500 discarded) n.sims = 4500 iterations saved mu.vect sd.vect 2.5% 25% 50% 75% 97.5% rhat n.eff beta[1] 1.049 2.202 -3.081 -0.484 1.097 2.520 5.844 1.782 6 beta[2] 0.004 0.011 -0.017 -0.001 0.005 0.011 0.026 1.437 8 beta[3] 0.165 0.065 0.034 0.118 0.171 0.213 0.286 1.039 85 beta[4] -0.035 0.011 -0.061 -0.042 -0.033 -0.027 -0.016 1.191 15 beta[5] 0.060 0.015 0.030 0.048 0.060 0.070 0.086 1.411 9 beta[6] 0.071 0.027 0.012 0.048 0.075 0.094 0.110 1.187 16 beta[7] -0.140 0.053 -0.243 -0.178 -0.138 -0.105 -0.039 1.072 33 beta[8] -0.037 0.186 -0.392 -0.171 -0.036 0.092 0.348 1.125 21 deviance 332.333 9.250 316.568 325.541 331.423 337.914 352.942 1.014 150 each parameter, n.eff crude measure of effective sample size, , rhat potential scale reduction factor (at convergence, rhat=1). dic info (using rule, pd = var(deviance)/2) pd = 42.2 , dic = 374.6 dic estimate of expected predictive error (lower deviance better).
the glmer() model following:
library(matrix) library(lme4) library(ggplot2) library(lmertest) multi.freq <- glmer(fix ~ h_polcon3 + imf_infl + imf_ue + imf_gd + wdi_trade + dr_eg + wdi_gdppcgr + dpi_erlc + (1 | cname), family = binomial(link="logit"), data=vars) multi.freq
and yields
generalized linear mixed model fit maximum likelihood (laplace approximation) [ glmermod] family: binomial ( logit ) formula: fix ~ h_polcon3 + imf_infl + imf_ue + imf_gd + wdi_trade + dr_eg + wdi_gdppcgr + dpi_erlc + (1 | cname) data: vars control: glmercontrol(optctrl = list(maxfun = 20000)) aic bic loglik deviance df.resid 446.6 490.7 -213.3 426.6 597 scaled residuals: min 1q median 3q max -2.5707 -0.2023 0.0056 0.2251 6.7237 random effects: groups name variance std.dev. cname (intercept) 16.92 4.113 number of obs: 607, groups: cname, 42 fixed effects: estimate std. error z value pr(>|z|) (intercept) -10.640232 2.362286 -4.504 6.66e-06 *** h_polcon3 1.025333 1.839488 0.557 0.57725 imf_infl 0.004985 0.010386 0.480 0.63129 imf_ue 0.183076 0.072181 2.536 0.01120 * imf_gd -0.031592 0.011738 -2.692 0.00711 ** wdi_trade 0.048175 0.016842 2.860 0.00423 ** dr_eg 0.092837 0.029145 3.185 0.00145 ** wdi_gdppcgr -0.118344 0.049786 -2.377 0.01745 * dpi_erlc -0.100724 0.183591 -0.549 0.58326 --- signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 correlation of fixed effects: (intr) h_plc3 imf_nf imf_ue imf_gd wd_trd dr_eg wd_gdp h_polcon3 -0.366 imf_infl -0.077 0.104 imf_ue -0.346 -0.076 0.265 imf_gd 0.052 -0.017 -0.214 -0.507 wdi_trade -0.238 0.099 -0.193 0.193 -0.285 dr_eg -0.585 -0.068 -0.304 0.038 0.043 -0.282 wdi_gdppcgr 0.039 -0.109 0.157 0.040 0.123 -0.295 0.024 dpi_erlc -0.163 0.151 -0.216 -0.005 0.017 0.156 -0.033 -0.122
i therefore suspicious made mistake when setting data. can tell me did wrong?
update
i tried another, simpler approach (with removing e.obs), however, still different results both bayesian , package-estimator:
b1.true <- 2.5 #individual level predictor coefficient b2.true <- 1.75 mu.a.true <- 0.5 sigma.a.true <- 1.25 #among group variance #simulating varying coefficients j <- 10 #50 groups u <- rnorm(j,0, 3) #group level predictor variable a.true <- rep (na, j) (j in 1:j){ a.true[j] <- rnorm (1, mu.a.true, sigma.a.true) } n <- 50 #number of observations #set.seed(666) country <- rep(1:j, n) x1 <- rnorm(n) #individual level predictor variable x2 <- rnorm(n) y <- rep (na, n) #dependent variable (i in 1:n){ y[i] <- rbinom(n, 1, 1/(1+exp(-(a.true[country[i]] + b1.true*x1[i] + b2.true*x2[i])))) }#prob. of dependent variable described linear combination of covariates , country-specific intercepts #for (i in 1:n){ #y[i] <- rnorm(1, a.true[country[i]] + b.true*x[i], sigma.y.true) #} test <- data.frame(country, x1, x2, y) #apply bayesian estimator test dataset writelines(" model{ (i in 1:n){ y[i] ~ dbin(p.bound[i], 1) p.bound[i] <- max(0, min(1, p[i])) logit(p[i]) <- xbeta[i] #likelihood xbeta[i] <- beta0 + beta[1]*x1[i] +beta[2]*x2[i] + v[country[i]] } #linear combination beta0 ~ dnorm(0,0.001) beta[1] ~ dnorm(0,0.001) #iv priors beta[2] ~ dnorm(0,0.001) (j in 1:n){ v[j] ~ dnorm(mu.country, tau.country) #country-specific intercept } mu.country ~ dnorm(0,0.001) tau.country <- pow(sigma.country, -2) #hyperparameter priors sigma.country ~ dunif(0, 100) }", con="multi.logit.jag") y <- test$y x1 <- test$x1 x2 <- test$x2 n <- 500 country <- test$country #specify levels country <- as.factor(country) nlevels(country) #10 groups country <- as.numeric(country) #transfer factor variabel numeric variable in order make model run n <- 10 m.data.test <-list('n'=n, 'n'= n, 'y'=y, 'country' = country, 'x1'=x1, 'x2'=x2) library(coda) library(rjags) library(r2jags) m.test <- jags(data=m.data.test, parameters.to.save="beta", model.file="multi.logit.jag", n.chains=3, n.iter=2000, n.burnin=500) print(m.test)
the results still biased:
inference bugs model @ "multi.logit.jag", fit using jags, 3 chains, each 2000 iterations (first 500 discarded) n.sims = 4500 iterations saved mu.vect sd.vect 2.5% 25% 50% 75% 97.5% rhat n.eff beta[1] 3.901 0.524 2.938 3.530 3.859 4.256 5.008 1.008 280 beta[2] 1.575 0.245 1.121 1.402 1.570 1.736 2.078 1.002 1500 deviance 273.741 4.814 266.316 270.207 273.118 276.595 284.787 1.002 2100 each parameter, n.eff crude measure of effective sample size, , rhat potential scale reduction factor (at convergence, rhat=1). dic info (using rule, pd = var(deviance)/2) pd = 11.6 , dic = 285.3 dic estimate of expected predictive error (lower deviance better).
same lme4:
#test package library(matrix) library(lme4) library(ggplot2) library(lmertest) multi.freq <- glmer(y ~ x1 + x2 + (1 | country), family = binomial(link="logit"), data=test, glmercontrol(optimizer="bobyqa", optctrl = list(maxfun = 100000))) summary(multi.freq)
results:
generalized linear mixed model fit maximum likelihood (laplace approximation) [ glmermod] family: binomial ( logit ) formula: y ~ x1 + x2 + (1 | country) data: test control: glmercontrol(optimizer = "bobyqa", optctrl = list(maxfun = 1e+05)) aic bic loglik deviance df.resid 311.7 328.6 -151.9 303.7 496 scaled residuals: min 1q median 3q max -2.00320 -0.18842 -0.01074 0.26742 2.08614 random effects: groups name variance std.dev. country (intercept) 6.721 2.593 number of obs: 500, groups: country, 10 fixed effects: estimate std. error z value pr(>|z|) (intercept) 0.1587 0.8481 0.187 0.852 x1 3.7344 0.5011 7.452 9.22e-14 *** x2 1.5496 0.2422 6.397 1.58e-10 *** --- signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 correlation of fixed effects: (intr) x1 x1 0.049 x2 0.022 0.174
Comments
Post a Comment