##### eLNN elnn <- function(data, pattern, dump=F, dump.z=F, file="mle_elnn.txt", file.z="mle_elnn_z.txt", header=null, header2=null) { # log-likelihood for null case (no differential expression) lp0 <- function(param, R, G) { alpha <- param[1] beta <- param[2] mu <- param[3] k <- param[4] n <- ncol(R) + ncol(G) temp1 <- apply(log(R), 1, sum) + apply(log(G), 1, sum) temp2 <- apply(log(R)^2, 1, sum) + apply(log(G)^2, 1, sum) return( alpha*log(beta) + lgamma(n/2+alpha) - temp1 - n/2 * log(2*pi) - lgamma(alpha) - 1/2 * log(k*n+1) - (n/2+alpha) * log( beta + 1/2/k*( -(k*temp1 + mu)^2 / (k*n+1) + k * temp2 + mu^2 ) ) ) } # log-likelihood for alternative case (differential expression) lpa <- function(param, R, G) { alpha <- param[1] beta <- param[2] mu <- param[3] k <- param[4] nR <- ncol(R) nG <- ncol(G) n <- nR + nG temp1 <- apply(log(R), 1, sum) temp2 <- apply(log(G), 1, sum) temp3 <- apply(log(R)^2, 1, sum) temp4 <- apply(log(G)^2, 1, sum) return( 2*alpha*log(beta) + lgamma(nR/2+alpha) + lgamma(nG/2+alpha) - temp1 - temp2 - n/2 * log(2*pi) - 2*lgamma(alpha) - 1/2*log(k*nR+1) - 1/2*log(k*nG+1) - (nR/2+alpha) * log( beta + 1/2/k*( -(k*temp1 + mu)^2 / (k*nR+1) + k * temp3 + mu^2 )) - (nG/2+alpha) * log( beta + 1/2/k*( -(k*temp2 + mu)^2 / (k*nG+1) + k * temp4 + mu^2 ))) } # null loglikelihood nlogl <- function(param, R, G) { value <- sum(lp0(param,R,G)) if (is.na(value)) value <- 1e50 return(-value) } # posterior probability of change z <- function(param, p, R, G) { p / (p + (1-p)*exp(lp0(param,R,G) - lpa(param,R,G))) } # complete data loglikelihood clogl <- function(param, R, G, z, p) { alpha <- param[1] beta <- param[2] mu <- param[3] k <- param[4] value <- sum(z*lpa(param,R,G) + (1-z)*lp0(param,R,G)) # + z*log(p) + (1-z)*log(1-p) + (b1-1)*log(p) + (b2-1)*log(1-p) if (is.na(value)) value <- 1e50 return(-value) } # gradient of clogl wrt alpha, beta, mu and k glogl <- function(param, R, G, z, p) { alpha <- param[1] beta <- param[2] mu <- param[3] k <- param[4] n1 <- ncol(R) n0 <- ncol(R)*2 nR <- ncol(R) nG <- ncol(G) n <- nR + nG temp1 <- apply(log(R), 1, sum) temp2 <- apply(log(G), 1, sum) temp3 <- apply(log(R)^2, 1, sum) temp4 <- apply(log(G)^2, 1, sum) value1 <- sum( z * ( 2*log(beta) + digamma(nR/2+alpha) + digamma(nG/2+alpha) - 2*digamma(alpha) - log( beta + 1/2/k*( -(k*temp1 + mu)^2 / (k*nR+1) + k * temp3 + mu^2 )) - log( beta + 1/2/k*( -(k*temp2 + mu)^2 / (k*nG+1) + k * temp4 + mu^2 )) ) + (1-z) * ( log(beta) + digamma(n/2+alpha) - digamma(alpha) - log( beta + 1/2/k*( -(k*(temp1+temp2) + mu)^2 / (k*n+1) + k * (temp3+temp4) + mu^2 )) )) value2 <- sum( z * ( 2*alpha/beta - (nR/2+alpha) / ( beta + 1/2/k*( -(k*temp1 + mu)^2 / (k*nR+1) + k * temp3 + mu^2 )) - (nG/2+alpha) / ( beta + 1/2/k*( -(k*temp2 + mu)^2 / (k*nG+1) + k * temp4 + mu^2 )) ) + (1-z) * ( alpha/beta - (n/2+alpha) / ( beta + 1/2/k*( -(k*(temp1+temp2) + mu)^2 / (k*n+1) + k * (temp3+temp4) + mu^2 )) )) value3 <- sum( z * ( -(nR/2+alpha) / ( beta + 1/2/k*( -(k*temp1 + mu)^2 / (k*nR+1) + k * temp3 + mu^2 )) * 1/2/k * ( -2*(k*temp1 + mu) / (k*nR+1) + 2*mu ) - (nG/2+alpha) / ( beta + 1/2/k*( -(k*temp2 + mu)^2 / (k*nG+1) + k * temp4 + mu^2 )) * 1/2/k * ( -2*(k*temp2 + mu) / (k*nG+1) + 2*mu ) ) + (1-z) * ( -(n/2+alpha) / ( beta + 1/2/k*( -(k*(temp1+temp2) + mu)^2 / (k*n+1) + k * (temp3+temp4) + mu^2 )) * 1/2/k * ( -2*(k*(temp1+temp2) + mu) / (k*n+1) + 2*mu ) ) ) value4 <- sum( z * ( -nR/2/(k*nR+1) -nG/2/(k*nG+1) - (nR/2+alpha) / ( beta + 1/2/k*( -(k*temp1 + mu)^2 / (k*nR+1) + k * temp3 + mu^2 )) * ( -1/2/k^2 * ( -(k*temp1 + mu)^2 / (k*nR+1) + k * temp3 + mu^2 ) + 1/2/k*( (-2*(k*temp1 + mu)*temp1 * (k*nR+1) + (k*temp1 + mu)^2 * nR) / (k*nR+1)^2 + temp3 ) ) - (nG/2+alpha) / ( beta + 1/2/k*( -(k*temp2 + mu)^2 / (k*nG+1) + k * temp4 + mu^2 )) * ( -1/2/k^2 * ( -(k*temp2 + mu)^2 / (k*nG+1) + k * temp4 + mu^2 ) + 1/2/k*( (-2*(k*temp2 + mu)*temp2 * (k*nG+1) + (k*temp2 + mu)^2 * nG) / (k*nG+1)^2 + temp4 ) ) ) + (1-z) * ( -n/2/(k*n+1) - (n/2+alpha) / ( beta + 1/2/k*( -(k*(temp1+temp2) + mu)^2 / (k*n+1) + k * (temp3+temp4) + mu^2 )) * ( -1/2/k^2 * ( -(k*(temp1+temp2) + mu)^2 / (k*n+1) + k * (temp3+temp4) + mu^2 ) + 1/2/k*( (-2*(k*(temp1+temp2) + mu)*(temp1+temp2) * (k*n+1) + (k*(temp1+temp2) + mu)^2 * n) / (k*n+1)^2 + (temp3+temp4) ) ) ) ) return(c(-value1, -value2, -value3, -value4)) } n <- nrow(data) pattern <- strsplit(pattern, " +")[[1]] con1 <- pattern=="1" con2 <- pattern=="2" r <- data[,con1] g <- data[,con2] # crude MM estimation of hyperparameters c.var <- sort(apply(log(data), 1, var))[(n%/%4):(n%/%4*3)] c.mean <- median(c.var) c.alpha <- c.mean^2 / var(c.var) + 2 c.beta <- c.mean * (c.alpha-1) init <- c(c.alpha, c.beta, mean(log(data), trim=0.025, na.rm=T), 30) if (dump) { write(header, file, ncolumns=1, append=T) write(c("crude", init, 0.5), file, ncolumns=6, append=T) # store the crude estimates of (alpha, beta, mu, k, p) } # hyperparameters of the beta prior distribution for p b1 <- 2 b2 <- 2 # to obtain initial estimates of alpha, beta, mu, k, Z, p using the null loglikelihood mleinfo <- optim(par=init, fn=nlogl, R=r, G=g, method="L-BFGS-B", lower=c(1e-4,1e-10,0.01,1), upper=c(100,1,100,10000)) if (mleinfo$value == -1e50) { old <- init repeat{ mleinfo <- optim(par=old, fn=nlogl, R=r, G=g, method="L-BFGS-B", lower=c(1e-4,1e-10,0.01,1), upper=c(100,1,100,10000), control=list(maxit=0)) if (mleinfo$value == -1e50) mleinfo$par <- old if (sum(abs( (mleinfo$par-old)/old )<10^-3) == 4) break else old <- mleinfo$par } } init2 <- mleinfo$par Z <- z(init2, p=0.5, r, g) p <- (sum(Z)+b1-1) / (n+b1+b2-2) init2p <- p if (dump) write(c("null", init2, init2p), file, ncolumns=6, append=T) # store the initial estimates of (alpha, beta, mu, k, p) # EM algorithm to obtain mles of alpha, beta, mu, k, p kk <- 0 repeat{ old <- mleinfo$par oldp <- p kk <- kk+1 Z <- z(old, oldp, r, g) p <- (sum(Z)+b1-1) / (n+b1+b2-2) # MLE of p mleinfo <- optim(par=old, fn=clogl, gr=glogl, R=r, G=g, z=Z, p=oldp, method="L-BFGS-B", lower=c(1e-4,1e-10,0.01,1), upper=c(100,1,100,10000)) if (mleinfo$value == -1e50) mleinfo$par <- old if (dump) write(c(kk, mleinfo$value, mleinfo$par, p), file, ncolumns=7, append=T) if (sum( c(abs((mleinfo$par-old)/old)<10^(-3), abs((p-oldp)/oldp)<10^(-3)) )==5) break } if (dump) write("", file, append=T) mle <- mleinfo$par mle_p <- p Z <- z(mle, mle_p, r, g) if (dump.z) { write(header2, file.z, ncolumns=1, append=T) write(Z, file.z, ncolumns=1, append=T) write("", file.z, append=T) } list(alpha=mle[1], beta=mle[2], mu=mle[3], k=mle[4], p=mle_p, z=Z) }