### simulate data: y (binary), x1 (binary), x2 (continuous) ### x1 missing for some subjects, missing prob depends on x2 set.seed(13) n <- 1000 x2 <- rnorm(n) x1 <- rbinom(n, size=1,prob=1/(1+exp(-(1.5*x2)))) y <- rbinom(n, size=1, prob=1/(1+exp(-( (-2)+.3*x1+.1*x2 )))) msng <- as.logical(rbinom(n, size=1, prob=.1+.7*(x2>0))) print(sum(msng)) ### fit all data (can't do in real life) and complete case data fit.all <- glm(y~cbind(x1,x2), family="binomial") fit.cmp <- glm(y[!msng]~cbind(x1[!msng],x2[!msng]), family="binomial") ### now try to fit the `available' data using EM alg. ### initial values from complete case analysis beta.hat <- coef(fit.cmp) gamm.hat <- coef(glm(x1[!msng]~x2[!msng], family="binomial")) print(round(c(beta.hat,gamm.hat),3)) ### do 30 EM iterations for (mnlp in 1:30) { ### compute conditional prob Pr(X1=1 | Y=y,X2=x2), i.e., E-step trm.0 <- exp(y*(cbind(1,0,x2)%*%beta.hat)) * (1+exp(cbind(1,0,x2)%*%beta.hat))^(-1) * exp(0*(cbind(1,x2)%*%gamm.hat)) * (1+exp(cbind(1,x2)%*%gamm.hat))^(-1) trm.1 <- exp(y*(cbind(1,1,x2)%*%beta.hat)) * (1+exp(cbind(1,1,x2)%*%beta.hat))^(-1) * exp(1*(cbind(1,x2)%*%gamm.hat)) * (1+exp(cbind(1,x2)%*%gamm.hat))^(-1) condpr <- trm.1/(trm.0+trm.1) ### form augmented data in prep for M-step x.aug <- cbind(x1,x2)[!msng,] y.aug <- y[!msng] datweights <- rep(1, sum(!msng)) for (i in ((1:n)[msng])) { x.aug <- rbind(x.aug, c(0,x2[i]), c(1,x2[i])) y.aug <- c(y.aug, rep(y[i],2)) datweights <- c(datweights,1-condpr[i],condpr[i]) } ### actual M-step beta.hat <- coef(glm(y.aug~x.aug, family="binomial", weights=datweights)) gamm.hat <- coef(glm(x.aug[,1]~x.aug[,2], family="binomial", weights=datweights)) print(mnlp) print(round(c(beta.hat,gamm.hat),3)) }