# rgam public example

#source('rgam-public.R')

library(rgam)
library(gam)
# 
# EXAMPLE 1
# 

# create data
set.seed(33)
n <- 80
k <- 4
n1 <- 25
x <- 1:n
la <- exp( sin(2*x/120) + cos(7*x/60) + 1)
y <- rpois(n, lambda=la)
# add outliers
x2 <- x[(n1-k):n1] + .5
la2 <- exp( sin(2*x2/120) + cos(7*x2/60) + 1)
y2 <- rpois(k+1, lambda= la2) + 20 
x <- c(x, x2) 
y <- c(y, y2)
la <- c(la, la2)

# plot data
plot(x,y)


# rgam fit - use robust cross validation to select the span
# a <- rgam(x=x, y=y, family='poisson', span='rcv', alphas=c(.3, .35, .4, .45, .5, .55, .6))
a <- rgam(x=x, y=y, family='poisson', cv.method='rcv', alpha=c(.3, .35, .4, .45, .5, .55, .6))


# gam fit - use a span that produces a similar fit to that of rgam where no outliers are
# present
b <- gam(y~lo(x, span=.3), family=poisson)

# show the fits and the true mean function
plot(x,y, main='Comparison of gam and robust gam fits - Poisson')
lines(sort(x), la[order(x)], lwd=3, col='gray')
lines(sort(x), predict(a, type='response')[order(x)], lwd=3, col='pink')
lines(sort(x), predict(b, type='response')[order(x)], col='lightblue', lwd=3)
legend(65, 20, legend=c('true', 'gam', 'rgam'), lwd=3, col=c('gray', 'lightblue', 'pink'))


# 
# EXAMPLE 2
# 

# create data - Binomial response

x <- as.vector(1:100)
n <- length(x)
nu <- cos(5*x/120)/.8
pri <- exp(nu) / (1 + exp(nu))
ni <- rep(10,n)
set.seed(31)
y <- rbinom(n, size=ni, prob=pri)
# add outliers
e <- rbinom(n=11, size=1, prob=.25)
y[75:85] <- (1-e)*y[75:85] + e*ni[75:85]
# plot data
plot(x,y)

# rgam fit - use robust cross validation to select the span
# a <- rgam(x=x, y=y, ni=ni, family='binomial', span='rcv', alphas=c(.3, .35, .4, .45, .5))
a <- rgam(x=x, y=y, ni=ni, family='binomial', cv.method='rcv', alpha=c(.3, .35, .4, .45, .5))



# gam fit - use a span that produces a similar fit to that of rgam where no outliers are
# present
b <- gam(cbind(y, ni-y)~lo(x, span=.3), family=binomial)

# show the fits and the true mean function
plot(x,y, main='Comparison of gam and robust gam fits - Binomial')
lines(sort(x), (ni*predict(a,type='response'))[order(x)], lwd=3, col='pink')
lines(sort(x), (ni*pri)[order(x)], lwd=3, col='gray')
lines(sort(x), (ni*predict(b, type='response'))[order(x)], col='lightblue', lwd=3)
legend(80, 9, legend=c('true', 'gam', 'rgam'), lwd=3, col=c('gray', 'lightblue', 'pink'))

 

# 
# EXAMPLE 3
# 

# create data - two covariates - Poisson response
set.seed(31)
n <- 20
x1 <- sort(runif(20))
x2 <- sort(runif(20))
x <- expand.grid(x1, x2)
#x <- x + matrix(rnorm(n*n*2, sd=.01), n*n, 2)
hh <- function(a) return( exp(3*sin(a[1]*pi*5/4) + 3*cos(a[2]*pi/2)) ) 
mu <- apply(x, 1, hh) 
# outliers
b <- rbinom(n*n, size=1, prob=.15)
y <- rpois(n*n, lambda=mu)*(1-b) + b*rpois(n*n, lambda=mu+500)

# show the data and the true mean surface
pp <- persp(x1, x2, matrix(mu, n, n), theta=150, phi=10, zlim=c(0, max(y)+5),
col='lightblue', shade=.75, main='True mean function', sub='gray points are below the surface')#
vv <- (y - mu) > 0
points(trans3d(x[,1], x[,2], y, pmat=pp), pch=19, col=c('lightgray','red')[as.numeric(vv)+1])


# rgam fit - cross-validation is not currently implemented for more 
# than one covariate
a <- rgam(x=x, y=y, family='poisson', alpha=.5)

# gam fit
g <- gam(y~lo(x[,1], span=.5)+lo(x[,2], span=.5), family=poisson)

# show the data and the robust gam fit
# points above the predicted mean surface are red
# points below the predicted mean surface are gray
#fi.rg <- a$predict
fi.rg <- predict(a, type='response')

pp <- persp(x1, x2, matrix(fi.rg, n, n), theta=150, phi=10, 
zlim=c(0, max(y)+5), col='lightblue', shade=.75, main='Robust GAM fit', sub='gray points are below the surface')
vv <- (y - fi.rg) > 0
points(trans3d(x[,1], x[,2], y, pmat=pp), pch=19, col=c('lightgray', 'red')[as.numeric(vv)+1])

# show the data and the gam fit
# points above the predicted mean surface are red
# points below the predicted mean surface are gray
fi.g <- predict(g, type='response')
pp2 <- persp(x1, x2, matrix(predict(g, type='response'), n, n), theta=150, phi=10, 
zlim=c(0, max(y)+5), col='lightblue', shade=.75, main='Classical GAM fit', sub='gray points are below the surface')
vv <- (y - fi.g) > 0
points(trans3d(x[,1], x[,2], y, pmat=pp2), pch=19, col=c('lightgray','red')[as.numeric(vv)+1])


fi.rg <- predict(a, type='response')
fi.g <- predict(g, type='response')

# plot the residuals for both fits
# now how the residuals of the gam fit are severely
# affected by the outliers
par(mfrow=c(1,2))
plot(fi.rg, (y - fi.rg), ylab='Residuals robust gam', xlab='Fitted values', pch=19, col='gray', ylim=c(-200, 550))
abline(h=0, lwd=3)
plot(fi.g, (y-fi.g), ylab='Residuals gam', xlab='Fitted values', pch=19, col='gray', ylim=c(-200, 550))
abline(h=0, lwd=3)
par(mfrow=c(1,1))