library(fields) ## THIS FILE HAS TWO FUNCTIONS: "my.spline" and "my.cv" ## AND A CROSS-VALIDATION EXAMPLE my.spline = function(x,y,alpha) { ## INPUT: ## x = numerical vector of dimension n. It's values should be sorted in increasing order (explanatory variable) ## y = numerical vector of dimension n (the values of the response variable) ## alpha = numerical value (between 0 and 1) ## OTPUT: ## s.hat the result of the spline fit (for each given value of x) n=length(x) Delta = x[2:n]-x[1:(n-1)] R = matrix(0,(n-2),(n-2)) for(i in 1:(n-2)){ if(i ==1) {R[i,i] = 2*(Delta[i]+Delta[i+1]); R[i,i+1]=Delta[i+1]} if(i >1 & i < n-2){ R[i,i-1] = Delta[i]; R[i,i] = 2*(Delta[i] + Delta[i+1]); R[i,i+1] = Delta[i+1] } if(i == (n-2)) {R[i,i-1] = Delta[i]; R[i,i] = 2*(Delta[i] + Delta[i+1])} } R=(1/6)*R Q = matrix(0,(n-2),n) Delta_inv=1/Delta for(i in 1:(n-2)){ Q[i,i] = Delta_inv[i];Q[i,i+1] = -(Delta_inv[i]+Delta_inv[i+1]); Q[i,i+2]=Delta_inv[i+1] } s.hat = solve((alpha*diag(n) + (1-alpha)*t(Q)%*%solve(R)%*%Q))%*%(alpha*y) #return(list(fitted=s.hat)) return(s.hat) } my.cv = function(x,y,alpha) { ## INPUT: ## x = real vector. It's values should be sorted in increasing order (explanatory variable) ## y = real vector (the values of the response variable) ## alpha = real vector of dim k (values of alpha to be tried in the CV comparison) ## OTPUT: ## res = real matrix of dimension k x 2, first column = alpha, secon column = CV results nn=length(x) mm=length(alpha) res=matrix(rep(0,mm*2),ncol=2) for(i in 1:mm){ alpha0=alpha[i] res[i,1]=alpha0 for(j in 1:nn){ xx=x[-j] yy=y[-j] res0=my.spline(xx,yy,alpha=alpha0) res[i,2]=res[i,2]+( y[j]-splint(xx,res0,x[j]))^2 } } return(res) } ### CROSS-VALIDATION #### ### USES THE FUNCTION my.cv( x , y, alpha.cv ) library(fields) set.seed(7) ## Creating the dataset and looking at the data: x=-.5+sort(runif(100)) y = 20*x+30*sin(3*pi*x) +10*rnorm(length(x)) plot(x,y) ## Creating a set of values of "alpha" for the CV comparison alpha.cv = c( seq(0.1,0.9,0.1),1-1e-2,1-1e-3,1-1e-4,1-1e-5,1-1e-6 ) ## Running the CV comparison and looking at the results: cv.res=my.cv(x,y,alpha.cv) cv.res ## Refining the "alpha grid" and running a new round of CV Comparisons alpha.cv=seq( 0.999,0.99999,5e-5) cv.res=my.cv(x,y,alpha.cv) cv.res ## Plotting the "best fit" s.hat=my.spline(x,y,alpha= 0.9999) x.grid=+seq(-0.5,0.5,.01) res=splint(x,s.hat,x.grid) lines(x.grid,res,col=2,lwd=3)