r - Simulating data for multi-level logit model -


i attempting simulate multi-level data (random intercept) testing multi-level bayesian logistic regression model. have done far:

n.country <- 42 #number of units obs <- rbinom(n.country, 10, .8) #number of obs. per unit n.obs <- sum(obs) #overall number of obs country <- rep(1:n.country, obs)  e.country <- rnorm(n.country,0, 3) #country-specific error e.obs <- rnorm(n.obs, 0, 5) #observation-specific error  # continuous independent variables set.seed(666) x1 = rnorm(n.obs)             x2 = rnorm(n.obs) x3 = rnorm(n.obs)           x4 = rnorm(n.obs) x5 = rnorm(n.obs)           x6 = rnorm(n.obs) x7 = rnorm(n.obs)           x8 = rnorm(n.obs)  #dependent variable  l <- 2*x1 + 3*x2 + 1.75*x3 + 2.5*x4 + 1.5*x5 +         3.5*x6 + 0.5*x7 + 1.75*x8 + e.country[country] + e.obs  ##  prob. of dependent variable described linear  ##   combination of covariates , country-specific intercepts  pr = 1/(1+exp(-l))# pass through inv-logit function y = rbinom(n.obs,1,pr)  #dv described probability  #set data frame test = data.frame(y=y,x1=x1,x2=x2,x3=x3,x4=x4,x5=x5,x6=x6,                   x7=x7,x8=x8,country=country) 

however, when applying estimator it, yields different coefficients ones have set up, , glmer() function of lme4 package. both interested in random , fixed effects. bayesian estimator looks this:

writelines(" model{   (i in 1:n){ y[i] ~ dbin(p.bound[i], 1) p.bound[i] <- max(0, min(1, p[i])) logit(p[i]) <- xbeta[i]  #likelihood     xbeta[i] <- v[country[i]] + beta[1]*x1[i] + beta[2]*x2[i] + beta[3]*x3[i] + beta[4]*x4[i] + beta[5]*x5[i] + beta[6]*x6[i] + beta[7]*x7[i] + beta[8]*x8[i]   } #linear combination   (j in 1:n){     v[j] ~ dnorm(beta0, prec.tau2) #country-specific intercept   }   beta[1] ~ dmnorm(0,0.001) #iv priors   beta[2] ~ dmnorm(0,0.001)   beta[3] ~ dmnorm(0,0.001)   beta[4] ~ dmnorm(0,0.001)   beta[5] ~ dmnorm(0,0.001)   beta[6] ~ dmnorm(0,0.001)   beta[7] ~ dmnorm(0,0.001)   beta[8] ~ dmnorm(0,0.001)   beta0 ~ dnorm(0,0.001)    prec.tau2 ~dgamma(0.001,0.001) #hyperparameter priors   tau <- sqrt(1/prec.tau2) }", con="multi.logit.jag")  y <- vars$fix  x1 <-  vars$h_polcon3  x2 <- vars$imf_infl  x3 <- vars$imf_ue  x4 <- vars$imf_gd  x5 <- vars$wdi_trade  x6 <- vars$dr_eg  x7 <- vars$wdi_gdppcgr  x8 <- vars$dpi_erlc  n <- 607  country <- vars$cname #specify levels country <- as.factor(country) nlevels(country) #sample consists of 42 countries country <- as.numeric(country) #transfer factor variabel numeric variable in order make model run n <- 42  m.data.multi <-list('n'=n, 'n'= n, 'y'=y, 'country' = country, 'x1'=x1, 'x2'=x2, 'x3'=x3, 'x4'=x4, 'x5'=x5, 'x6'=x6, 'x7'=x7, 'x8'=x8)  library(coda) library(rjags) library(r2jags) m.sim.multi <- jags(data=m.data.multi, parameters.to.save="beta", model.file="multi.logit.jag", n.chains=3, n.iter=2000, n.burnin=500) print(m.sim.multi) 

and yields

inference bugs model @ "multi.logit.jag", fit using jags,  3 chains, each 2000 iterations (first 500 discarded)  n.sims = 4500 iterations saved          mu.vect sd.vect    2.5%     25%     50%     75%   97.5%  rhat n.eff beta[1]    1.049   2.202  -3.081  -0.484   1.097   2.520   5.844 1.782     6 beta[2]    0.004   0.011  -0.017  -0.001   0.005   0.011   0.026 1.437     8 beta[3]    0.165   0.065   0.034   0.118   0.171   0.213   0.286 1.039    85 beta[4]   -0.035   0.011  -0.061  -0.042  -0.033  -0.027  -0.016 1.191    15 beta[5]    0.060   0.015   0.030   0.048   0.060   0.070   0.086 1.411     9 beta[6]    0.071   0.027   0.012   0.048   0.075   0.094   0.110 1.187    16 beta[7]   -0.140   0.053  -0.243  -0.178  -0.138  -0.105  -0.039 1.072    33 beta[8]   -0.037   0.186  -0.392  -0.171  -0.036   0.092   0.348 1.125    21 deviance 332.333   9.250 316.568 325.541 331.423 337.914 352.942 1.014   150  each parameter, n.eff crude measure of effective sample size, , rhat potential scale reduction factor (at convergence, rhat=1).  dic info (using rule, pd = var(deviance)/2) pd = 42.2 , dic = 374.6 dic estimate of expected predictive error (lower deviance better). 

the glmer() model following:

library(matrix) library(lme4) library(ggplot2) library(lmertest)  multi.freq <- glmer(fix ~ h_polcon3 + imf_infl + imf_ue + imf_gd + wdi_trade + dr_eg + wdi_gdppcgr + dpi_erlc + (1 | cname), family = binomial(link="logit"), data=vars) multi.freq 

and yields

generalized linear mixed model fit maximum likelihood (laplace approximation) [ glmermod]  family: binomial  ( logit ) formula: fix ~ h_polcon3 + imf_infl + imf_ue + imf_gd + wdi_trade + dr_eg +       wdi_gdppcgr + dpi_erlc + (1 | cname)    data: vars control: glmercontrol(optctrl = list(maxfun = 20000))       aic      bic   loglik deviance df.resid     446.6    490.7   -213.3    426.6      597   scaled residuals:      min      1q  median      3q     max  -2.5707 -0.2023  0.0056  0.2251  6.7237   random effects:  groups name        variance std.dev.  cname  (intercept) 16.92    4.113    number of obs: 607, groups:  cname, 42  fixed effects:               estimate std. error z value pr(>|z|)     (intercept) -10.640232   2.362286  -4.504 6.66e-06 *** h_polcon3     1.025333   1.839488   0.557  0.57725     imf_infl      0.004985   0.010386   0.480  0.63129     imf_ue        0.183076   0.072181   2.536  0.01120 *   imf_gd       -0.031592   0.011738  -2.692  0.00711 **  wdi_trade     0.048175   0.016842   2.860  0.00423 **  dr_eg         0.092837   0.029145   3.185  0.00145 **  wdi_gdppcgr  -0.118344   0.049786  -2.377  0.01745 *   dpi_erlc     -0.100724   0.183591  -0.549  0.58326     --- signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1  correlation of fixed effects:             (intr) h_plc3 imf_nf imf_ue imf_gd wd_trd dr_eg  wd_gdp h_polcon3   -0.366                                                  imf_infl    -0.077  0.104                                           imf_ue      -0.346 -0.076  0.265                                    imf_gd       0.052 -0.017 -0.214 -0.507                             wdi_trade   -0.238  0.099 -0.193  0.193 -0.285                      dr_eg       -0.585 -0.068 -0.304  0.038  0.043 -0.282               wdi_gdppcgr  0.039 -0.109  0.157  0.040  0.123 -0.295  0.024        dpi_erlc    -0.163  0.151 -0.216 -0.005  0.017  0.156 -0.033 -0.122 

i therefore suspicious made mistake when setting data. can tell me did wrong?

update

i tried another, simpler approach (with removing e.obs), however, still different results both bayesian , package-estimator:

b1.true <- 2.5 #individual level predictor coefficient b2.true <- 1.75  mu.a.true <- 0.5 sigma.a.true <- 1.25 #among group variance  #simulating varying coefficients j <- 10 #50 groups u <- rnorm(j,0, 3) #group level predictor variable  a.true <- rep (na, j) (j in 1:j){   a.true[j] <- rnorm (1, mu.a.true, sigma.a.true) }  n <- 50 #number of observations #set.seed(666) country <- rep(1:j, n) x1 <- rnorm(n) #individual level predictor variable x2 <- rnorm(n)  y <- rep (na, n) #dependent variable  (i in 1:n){   y[i] <- rbinom(n, 1, 1/(1+exp(-(a.true[country[i]] + b1.true*x1[i] + b2.true*x2[i])))) }#prob. of dependent variable described linear combination of covariates , country-specific intercepts   #for (i in 1:n){   #y[i] <- rnorm(1, a.true[country[i]] + b.true*x[i], sigma.y.true) #}  test <- data.frame(country, x1, x2, y)  #apply bayesian estimator test dataset  writelines("            model{            (i in 1:n){            y[i] ~ dbin(p.bound[i], 1)            p.bound[i] <- max(0, min(1, p[i]))            logit(p[i]) <- xbeta[i]  #likelihood            xbeta[i] <- beta0 + beta[1]*x1[i] +beta[2]*x2[i] + v[country[i]]             } #linear combination            beta0 ~ dnorm(0,0.001)             beta[1] ~ dnorm(0,0.001) #iv priors            beta[2] ~ dnorm(0,0.001)            (j in 1:n){            v[j] ~ dnorm(mu.country, tau.country) #country-specific intercept            }            mu.country ~ dnorm(0,0.001)             tau.country <- pow(sigma.country, -2) #hyperparameter priors            sigma.country ~ dunif(0, 100)            }", con="multi.logit.jag")  y <- test$y  x1 <-  test$x1  x2 <- test$x2  n <- 500  country <- test$country #specify levels country <- as.factor(country) nlevels(country) #10 groups country <- as.numeric(country) #transfer factor variabel numeric variable in order make model run n <- 10  m.data.test <-list('n'=n, 'n'= n, 'y'=y, 'country' = country, 'x1'=x1, 'x2'=x2)  library(coda) library(rjags) library(r2jags) m.test <- jags(data=m.data.test, parameters.to.save="beta", model.file="multi.logit.jag", n.chains=3, n.iter=2000, n.burnin=500) print(m.test) 

the results still biased:

inference bugs model @ "multi.logit.jag", fit using jags,  3 chains, each 2000 iterations (first 500 discarded)  n.sims = 4500 iterations saved          mu.vect sd.vect    2.5%     25%     50%     75%   97.5%  rhat n.eff beta[1]    3.901   0.524   2.938   3.530   3.859   4.256   5.008 1.008   280 beta[2]    1.575   0.245   1.121   1.402   1.570   1.736   2.078 1.002  1500 deviance 273.741   4.814 266.316 270.207 273.118 276.595 284.787 1.002  2100  each parameter, n.eff crude measure of effective sample size, , rhat potential scale reduction factor (at convergence, rhat=1).  dic info (using rule, pd = var(deviance)/2) pd = 11.6 , dic = 285.3 dic estimate of expected predictive error (lower deviance better). 

same lme4:

#test package  library(matrix) library(lme4) library(ggplot2) library(lmertest)  multi.freq <- glmer(y ~ x1 + x2 + (1 | country), family = binomial(link="logit"), data=test, glmercontrol(optimizer="bobyqa", optctrl = list(maxfun = 100000))) summary(multi.freq) 

results:

generalized linear mixed model fit maximum likelihood (laplace approximation) [ glmermod]  family: binomial  ( logit ) formula: y ~ x1 + x2 + (1 | country)    data: test control: glmercontrol(optimizer = "bobyqa", optctrl = list(maxfun = 1e+05))       aic      bic   loglik deviance df.resid     311.7    328.6   -151.9    303.7      496   scaled residuals:       min       1q   median       3q      max  -2.00320 -0.18842 -0.01074  0.26742  2.08614   random effects:  groups  name        variance std.dev.  country (intercept) 6.721    2.593    number of obs: 500, groups:  country, 10  fixed effects:             estimate std. error z value pr(>|z|)     (intercept)   0.1587     0.8481   0.187    0.852     x1            3.7344     0.5011   7.452 9.22e-14 *** x2            1.5496     0.2422   6.397 1.58e-10 *** --- signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1  correlation of fixed effects:    (intr) x1    x1 0.049        x2 0.022  0.174 


Comments

Popular posts from this blog

python - How to insert QWidgets in the middle of a Layout? -

python - serve multiple gunicorn django instances under nginx ubuntu -

module - Prestashop displayPaymentReturn hook url -