### simulate data: y (binary), x1 (binary), x2 (continuous)
### x1 missing for some subjects, missing prob depends on x2

set.seed(13)
n <- 1000
x2   <- rnorm(n)
x1   <- rbinom(n, size=1,prob=1/(1+exp(-(1.5*x2))))
y    <- rbinom(n, size=1, prob=1/(1+exp(-( (-2)+.3*x1+.1*x2 ))))
msng <- as.logical(rbinom(n, size=1, prob=.1+.7*(x2>0)))
print(sum(msng))

### fit all data (can't do in real life) and complete case data

fit.all <- glm(y~cbind(x1,x2), family="binomial")
fit.cmp <- glm(y[!msng]~cbind(x1[!msng],x2[!msng]), family="binomial")

### now try to fit the `available' data using EM alg.

### initial values from complete case analysis
beta.hat <- coef(fit.cmp)
gamm.hat <- coef(glm(x1[!msng]~x2[!msng], family="binomial"))
print(round(c(beta.hat,gamm.hat),3))

### do 30 EM iterations
for (mnlp in 1:30) {

  ### compute conditional prob Pr(X1=1 | Y=y,X2=x2), i.e., E-step

  trm.0 <- exp(y*(cbind(1,0,x2)%*%beta.hat)) *
           (1+exp(cbind(1,0,x2)%*%beta.hat))^(-1) *
           exp(0*(cbind(1,x2)%*%gamm.hat)) *
           (1+exp(cbind(1,x2)%*%gamm.hat))^(-1)

  trm.1 <- exp(y*(cbind(1,1,x2)%*%beta.hat)) *
           (1+exp(cbind(1,1,x2)%*%beta.hat))^(-1) *
           exp(1*(cbind(1,x2)%*%gamm.hat)) *
           (1+exp(cbind(1,x2)%*%gamm.hat))^(-1)

  condpr <- trm.1/(trm.0+trm.1)

  ### form augmented data in prep for M-step 

  x.aug <- cbind(x1,x2)[!msng,]
  y.aug <- y[!msng]
  datweights <- rep(1, sum(!msng))

  for (i in ((1:n)[msng])) {
    x.aug <- rbind(x.aug, c(0,x2[i]), c(1,x2[i]))
    y.aug <- c(y.aug, rep(y[i],2))
    datweights <- c(datweights,1-condpr[i],condpr[i]) }

  ### actual M-step
  beta.hat <- coef(glm(y.aug~x.aug, family="binomial", weights=datweights))
  gamm.hat <- coef(glm(x.aug[,1]~x.aug[,2], family="binomial", 
                   weights=datweights))

  print(mnlp)
  print(round(c(beta.hat,gamm.hat),3))

}