# rgam public example #source('rgam-public.R') library(rgam) library(gam) # # EXAMPLE 1 # # create data set.seed(33) n <- 80 k <- 4 n1 <- 25 x <- 1:n la <- exp( sin(2*x/120) + cos(7*x/60) + 1) y <- rpois(n, lambda=la) # add outliers x2 <- x[(n1-k):n1] + .5 la2 <- exp( sin(2*x2/120) + cos(7*x2/60) + 1) y2 <- rpois(k+1, lambda= la2) + 20 x <- c(x, x2) y <- c(y, y2) la <- c(la, la2) # plot data plot(x,y) # rgam fit - use robust cross validation to select the span # a <- rgam(x=x, y=y, family='poisson', span='rcv', alphas=c(.3, .35, .4, .45, .5, .55, .6)) a <- rgam(x=x, y=y, family='poisson', cv.method='rcv', alpha=c(.3, .35, .4, .45, .5, .55, .6)) # gam fit - use a span that produces a similar fit to that of rgam where no outliers are # present b <- gam(y~lo(x, span=.3), family=poisson) # show the fits and the true mean function plot(x,y, main='Comparison of gam and robust gam fits - Poisson') lines(sort(x), la[order(x)], lwd=3, col='gray') lines(sort(x), predict(a, type='response')[order(x)], lwd=3, col='pink') lines(sort(x), predict(b, type='response')[order(x)], col='lightblue', lwd=3) legend(65, 20, legend=c('true', 'gam', 'rgam'), lwd=3, col=c('gray', 'lightblue', 'pink')) # # EXAMPLE 2 # # create data - Binomial response x <- as.vector(1:100) n <- length(x) nu <- cos(5*x/120)/.8 pri <- exp(nu) / (1 + exp(nu)) ni <- rep(10,n) set.seed(31) y <- rbinom(n, size=ni, prob=pri) # add outliers e <- rbinom(n=11, size=1, prob=.25) y[75:85] <- (1-e)*y[75:85] + e*ni[75:85] # plot data plot(x,y) # rgam fit - use robust cross validation to select the span # a <- rgam(x=x, y=y, ni=ni, family='binomial', span='rcv', alphas=c(.3, .35, .4, .45, .5)) a <- rgam(x=x, y=y, ni=ni, family='binomial', cv.method='rcv', alpha=c(.3, .35, .4, .45, .5)) # gam fit - use a span that produces a similar fit to that of rgam where no outliers are # present b <- gam(cbind(y, ni-y)~lo(x, span=.3), family=binomial) # show the fits and the true mean function plot(x,y, main='Comparison of gam and robust gam fits - Binomial') lines(sort(x), (ni*predict(a,type='response'))[order(x)], lwd=3, col='pink') lines(sort(x), (ni*pri)[order(x)], lwd=3, col='gray') lines(sort(x), (ni*predict(b, type='response'))[order(x)], col='lightblue', lwd=3) legend(80, 9, legend=c('true', 'gam', 'rgam'), lwd=3, col=c('gray', 'lightblue', 'pink')) # # EXAMPLE 3 # # create data - two covariates - Poisson response set.seed(31) n <- 20 x1 <- sort(runif(20)) x2 <- sort(runif(20)) x <- expand.grid(x1, x2) #x <- x + matrix(rnorm(n*n*2, sd=.01), n*n, 2) hh <- function(a) return( exp(3*sin(a[1]*pi*5/4) + 3*cos(a[2]*pi/2)) ) mu <- apply(x, 1, hh) # outliers b <- rbinom(n*n, size=1, prob=.15) y <- rpois(n*n, lambda=mu)*(1-b) + b*rpois(n*n, lambda=mu+500) # show the data and the true mean surface pp <- persp(x1, x2, matrix(mu, n, n), theta=150, phi=10, zlim=c(0, max(y)+5), col='lightblue', shade=.75, main='True mean function', sub='gray points are below the surface')# vv <- (y - mu) > 0 points(trans3d(x[,1], x[,2], y, pmat=pp), pch=19, col=c('lightgray','red')[as.numeric(vv)+1]) # rgam fit - cross-validation is not currently implemented for more # than one covariate a <- rgam(x=x, y=y, family='poisson', alpha=.5) # gam fit g <- gam(y~lo(x[,1], span=.5)+lo(x[,2], span=.5), family=poisson) # show the data and the robust gam fit # points above the predicted mean surface are red # points below the predicted mean surface are gray #fi.rg <- a$predict fi.rg <- predict(a, type='response') pp <- persp(x1, x2, matrix(fi.rg, n, n), theta=150, phi=10, zlim=c(0, max(y)+5), col='lightblue', shade=.75, main='Robust GAM fit', sub='gray points are below the surface') vv <- (y - fi.rg) > 0 points(trans3d(x[,1], x[,2], y, pmat=pp), pch=19, col=c('lightgray', 'red')[as.numeric(vv)+1]) # show the data and the gam fit # points above the predicted mean surface are red # points below the predicted mean surface are gray fi.g <- predict(g, type='response') pp2 <- persp(x1, x2, matrix(predict(g, type='response'), n, n), theta=150, phi=10, zlim=c(0, max(y)+5), col='lightblue', shade=.75, main='Classical GAM fit', sub='gray points are below the surface') vv <- (y - fi.g) > 0 points(trans3d(x[,1], x[,2], y, pmat=pp2), pch=19, col=c('lightgray','red')[as.numeric(vv)+1]) fi.rg <- predict(a, type='response') fi.g <- predict(g, type='response') # plot the residuals for both fits # now how the residuals of the gam fit are severely # affected by the outliers par(mfrow=c(1,2)) plot(fi.rg, (y - fi.rg), ylab='Residuals robust gam', xlab='Fitted values', pch=19, col='gray', ylim=c(-200, 550)) abline(h=0, lwd=3) plot(fi.g, (y-fi.g), ylab='Residuals gam', xlab='Fitted values', pch=19, col='gray', ylim=c(-200, 550)) abline(h=0, lwd=3) par(mfrow=c(1,1))