@article { ISI:000293113300030, title = {An Outlier-Robust Fit for Generalized Additive Models With Applications to Disease Outbreak Detection}, journal = {JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, volume = {106}, number = {494}, year = {2011}, month = {JUN}, pages = {719-731}, publisher = {AMER STATISTICAL ASSOC}, type = {Article}, address = {732 N WASHINGTON ST, ALEXANDRIA, VA 22314-1943 USA}, abstract = {
We are interested in a class of unsupervised methods to detect possible disease outbreaks, that is, rapid increases in the number of cases of a particular disease that deviate from the pattern observed in the past. The motivating application for this article deals with detecting outbreaks using generalized additive models (GAMs) to model weekly counts of certain infectious diseases. We can use the distance between the predicted and observed counts for a specific week to determine whether an important departure has occurred. Unfortunately, this approach may not work as desired because GAMs can be very sensitive to the presence of a small proportion of observations that deviate from the assumed model. Thus, the outbreak may affect the predicted values causing these to be close to the atypical counts, and thus mask the outliers by having them appear not to be too extreme or atypical. We illustrate this phenomenon with influenza-like-illness doctor-visits data from the United States for the 2006-2008 flu seasons. One way to avoid this masking problem is to derive an algorithm to fit GAM models that can resist the effect of a small number of atypical observations. In this article we discuss such an outlier-robust fit for GAMs based on the backfitting algorithm. The basic idea is to replace the maximum likelihood based weights used in the generalized local scoring algorithm with those derived from robust quasi-likelihood equations (Cantoni and Ronchetti 2001b). These robust estimators for generalized linear models work well for the Poisson family of distributions, and also for binomial distributions with relatively large numbers of trials. We show that the resulting estimated mean function is resistant to the presence of outliers in the response variable and that it also remains close to the usual GAM estimator when the data do not contain atypical observations. We illustrate the use of this approach on the detection of the recent outbreak of H1N1 flu by looking at the weekly counts of influenza-like-illness (ILI) doctor visits, as reported through the U.S. Outpatient Influenza-like Illness Surveillance Network (ILINet), and also apply our method to the numbers of requested isolates in Canada. Weeks with a sudden increase in ILI visits or requested isolates are much more clearly identified as atypical by the robust fit because the observed counts are far from the ones predicted by the fitted GAM model.
}, keywords = {Outliers, Robust quasi-likelihood, Robustness}, issn = {0162-1459}, doi = {10.1198/jasa.2011.tm09654}, author = {Alimadad, Azadeh and Salibian-Barrera, Mat{\'\i}as} } @article { ISI:000281333900020, title = {Fast robust estimation of prediction error based on resampling}, journal = {COMPUTATIONAL STATISTICS \& DATA ANALYSIS}, volume = {54}, number = {12, SI}, year = {2010}, month = {DEC 1}, pages = {3121-3130}, publisher = {ELSEVIER SCIENCE BV}, type = {Article}, address = {PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS}, abstract = {Robust estimators of the prediction error of a linear model are proposed. The estimators are based on the resampling techniques cross-validation and bootstrap. The robustness of the prediction error estimators is obtained by robustly estimating the regression parameters of the linear model and by trimming the largest prediction errors. To avoid the recalculation of time-consuming robust regression estimates, fast approximations for the robust estimates of the resampled data are used. This leads to time-efficient and robust estimators of prediction error. (C) 2010 Elsevier B.V. All rights reserved.}, keywords = {bootstrap, Cross-validation, Prediction error, Robustness}, issn = {0167-9473}, doi = {10.1016/j.csda.2010.01.031}, author = {Khan, Jafar A. and Van Aelst, Stefan and Zamar, Ruben H.} } @article { ISI:000280072500005, title = {Uniform asymptotics for S- and MM-regression estimators}, journal = {ANNALS OF THE INSTITUTE OF STATISTICAL MATHEMATICS}, volume = {62}, number = {5}, year = {2010}, month = {OCT}, pages = {897-927}, publisher = {SPRINGER HEIDELBERG}, type = {Article}, address = {TIERGARTENSTRASSE 17, D-69121 HEIDELBERG, GERMANY}, abstract = {In this paper we find verifiable regularity conditions to ensure that S-estimators of scale and regression and MM-estimators of regression are uniformly consistent and uniformly asymptotically normally distributed over contamination neighbourhoods. Moreover, we show how to calculate the size of these neighbourhoods. In particular, we find that, for MM-estimators computed with Tukey{\textquoteright}s family of bisquare score functions, there is a trade-off between the size of these neighbourhoods and both the breakdown point of the S-estimators and the leverage of the contamination that is allowed in the neighbourhood. These results extend previous work of Salibian-Barrera and Zamar for location-scale to the linear regression model.}, keywords = {Robust inference, Robust regression, Robustness, Uniform asymptotics}, issn = {0020-3157}, doi = {10.1007/s10463-008-0189-x}, author = {Omelka, Marek and Salibian-Barrera, Mat{\'\i}as} } @article { ISI:000263129000011, title = {PROPAGATION OF OUTLIERS IN MULTIVARIATE DATA}, journal = {ANNALS OF STATISTICS}, volume = {37}, number = {1}, year = {2009}, month = {FEB}, pages = {311-331}, publisher = {INST MATHEMATICAL STATISTICS}, type = {Article}, address = {3163 SOMERSET DR, CLEVELAND, OH 44122 USA}, abstract = {We investigate the performance of robust estimates of multivariate location under nonstandard data contamination models such as componentwise outliers (i.e., contamination in each variable is independent from the other variables). This model brings up a possible new source of statistical error that we call {\textquoteleft}{\textquoteleft}propagation of outliers.{\textquoteright}{\textquoteright} This source of error is Unusual in the sense that it is generated by the data processing itself and takes place after the data has been collected. We define and derive the influence function of robust multivariate location estimates under flexible contamination models and use it to investigate the effect of propagation of outliers. Furthermore, we show that standard high-breakdown affine equivariant estimators propagate outliers and therefore show poor breakdown behavior under componentwise contamination when the dimension d is high.}, keywords = {breakdown point, contamination model, independent contamination, influence function, Robustness}, issn = {0090-5364}, doi = {10.1214/07-AOS588}, author = {Alqallaf, Fatemah and Van Aelst, Stefan and Yohai, Victor J. and Zamar, Ruben H.} } @conference { ISI:000259327700030, title = {Fast robust variable selection}, booktitle = {COMPSTAT 2008: PROCEEDINGS IN COMPUTATIONAL STATISTICS}, year = {2008}, note = {18th Symposium on Computational Statistics (COMSTAT 2008), Oporto, PORTUGAL, AUG 24-29, 2008}, pages = {359-370}, publisher = {Univ Porto, Fac Econ; PSE; FCT; FEUP; Banco Porutgal; PORTO; SPM; Caixa Geral Depositos; SPE; CLAD; ifcs}, organization = {Univ Porto, Fac Econ; PSE; FCT; FEUP; Banco Porutgal; PORTO; SPM; Caixa Geral Depositos; SPE; CLAD; ifcs}, type = {Proceedings Paper}, address = {TIERGARTENSTR 17, D-69121 HEIDELBERG, GERMANY}, abstract = {We discuss some computationally efficient procedures for robust variable selection in linear regression. A key component in these procedures is the computation of robust correlations between pairs of variables. We show that the robust variable selection procedures can easily handle missing data under the assumption that data are missing completely at random.}, keywords = {correlation, missing data, Robustness, variable selection}, isbn = {978-3-7908-2083-6}, author = {Van Aelst, Stefan and Khan, Jafar A. and Zamar, Ruben H.}, editor = {Brito, P} } @article { ISI:000267403600007, title = {Weighted quantile regression with nonelliptically structured covariates}, journal = {CANADIAN JOURNAL OF STATISTICS-REVUE CANADIENNE DE STATISTIQUE}, volume = {36}, number = {4}, year = {2008}, month = {DEC}, pages = {595-611}, publisher = {WILEY-BLACKWELL PUBLISHING, INC}, type = {Article}, address = {COMMERCE PLACE, 350 MAIN ST, MALDEN 02148, MA USA}, abstract = {Although quantile regression estimators are robust against low leverage observations with atypically large responses (Koenker \& Bassett 1978), they can be seriously affected by a few points that deviate from the majority of the sample covariates. This problem can be alleviated by downweighting observations with high leverage. Unfortunately, when the covariates are not elliptically distributed, Mahalanobis distances may not be able to correctly identify atypical points. In this paper the authors discuss the use of weights based on a new leverage measure constructed using Rosenblatt{\textquoteright}s multivariate transformation which is able to reflect nonelliptical structures in the covariate space. The resulting weighted estimators are consistent, asymptotically normal, and have a bounded influence function. In addition, the authors also discuss a selection criterion for choosing the downweighting scheme. They illustrate their approach with child growth data from Finland. Finally, their simulation studies suggest that this methodology has good finite-sample properties.}, keywords = {Nonparametric methods, quantile regression, Robustness}, issn = {0319-5724}, author = {Salibian-Barrera, Mat{\'\i}as and Wei, Ying} } @article { ISI:000238028100013, title = {Bootstrapping MM-estimators for linear regression with fixed designs}, journal = {STATISTICS \& PROBABILITY LETTERS}, volume = {76}, number = {12}, year = {2006}, month = {JUL 1}, pages = {1287-1297}, publisher = {ELSEVIER SCIENCE BV}, type = {Article}, address = {PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS}, abstract = {In this paper, I study the extension of the robust bootstrap [Salibian-Barrera, M., Zarnar, R.H., 2002. Bootstrapping robust estimates of regression. Ann. Statist. 30, 556-582] to the case of fixed designs. The robust bootstrap is a computer-intensive inference method for robust regression estimators which is computationally simple (because we do not need to recompute the robust estimate with each bootstrap sample) and robust to the presence of outliers in the bootstrap samples. In this paper, I prove the consistency of this method for the case of non-random explanatory variables and illustrate its use on a real data set. Simulation results indicate that confidence intervals based on the robust bootstrap have good finite-sample coverage levels. (C) 2006 Elsevier B.V. All rights reserved.}, keywords = {bootstrap, fixed design, inference, linear regression, MM-estimators, Robustness}, issn = {0167-7152}, doi = {10.1016/j.spl.2006.01.008}, author = {Salibian-Barrera, M} } @article { ISI:000238044400008, title = {A fast algorithm for S-regression estimates}, journal = {JOURNAL OF COMPUTATIONAL AND GRAPHICAL STATISTICS}, volume = {15}, number = {2}, year = {2006}, month = {JUN}, pages = {414-427}, publisher = {AMER STATISTICAL ASSOC}, type = {Article}, address = {1429 DUKE ST, ALEXANDRIA, VA 22314 USA}, abstract = {Equivariant high-breakdown point regression estimates are computationally expensive, and the corresponding algorithms become unfeasible for moderately large number of regressors. One important advance to improve the computational speed of one such estimator is the fast-LTS algorithm. This article proposes an analogous algorithm for computing S-estimates. The new algorithm, that we call {\textquoteleft}{\textquoteleft}fast-S{\textquoteright}{\textquoteright}, is also based on a {\textquoteleft}{\textquoteleft}local improvement{\textquoteright}{\textquoteright} step of the resampling initial candidates. This allows for a substantial reduction of the number of candidates required to obtain a good approximation to the optimal solution. We performed a simulation study which shows that S-estimators computed with the fast-S algorithm compare favorably to the LTS-estimators computed with the fast-LTS algorithm.
}, keywords = {high breakdown point, linear regression, Robustness}, issn = {1061-8600}, doi = {10.1198/106186006X113629}, author = {Salibian-Barrera, M and Yohai, VJ} } @article { ISI:000240158700035, title = {Principal components analysis based on multivariate MM estimators with fast and robust bootstrap}, journal = {JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION}, volume = {101}, number = {475}, year = {2006}, month = {SEP}, pages = {1198-1211}, publisher = {AMER STATISTICAL ASSOC}, type = {Article}, address = {1429 DUKE ST, ALEXANDRIA, VA 22314 USA}, abstract = {We consider robust principal components analysis (PCA) based on multivariate MM estimators. We first study the robustness and efficiency of these estimators, particularly in terms of eigenvalues and eigenvectors. We then focus on inference procedures based on a fast and robust bootstrap for MM estimators. This method is an alternative to the approach based on the asymptotic distribution of the estimators and can also be used to assess the stability of the principal components. A formal consistency proof for the bootstrap method is given, and its finite-sample performance is investigated through simulations. We illustrate the use of the robust PCA and the bootstrap inference on a real dataset.}, keywords = {bootstrap, inference, MM-estimators, Principal components, Robustness}, issn = {0162-1459}, doi = {10.1198/016214506000000096}, author = {Salibian-Barrera, Mat{\'\i}as and Van Aelst, Stefan and Willems, Gert} } @conference { ISI:000242170000004, title = {A robust linear grouping algorithm}, booktitle = {COMPSTAT 2006: Proceedings in Computational Statistics}, year = {2006}, note = {17th Symposium on Computational Statistics (COMSTAT 2006), Rome, ITALY, AUG 28-SEP 01, 2006}, pages = {43-53}, publisher = {PHYSICA-VERLAG GMBH \& CO}, organization = {PHYSICA-VERLAG GMBH \& CO}, type = {Proceedings Paper}, address = {TIERGARTENSTR 17, D-69121 HEIDELBERG, GERMANY}, abstract = {Recently, an algorithm to detect groups in a dataset that follow different linear patterns was proposed in [VWZ06]. The algorithm is flexible in the sense that it does not require the specification of a response variable. On the other hand, the algorithm requires that each observation follows one of the linear patterns in the data. However, it often occurs in practice that part of the data does not follow any of the linear patterns. Therefore, we introduce a robust linear grouping algorithm based on trimming that can still find the linear structures even if part of the data does not belong to any of the groups.}, keywords = {linear grouping, Robustness, trimming}, isbn = {3-7908-1708-2}, doi = {10.1007/978-3-7908-1709-6_4}, author = {Pison, Greet and Van Aelst, Stefan and Zamar, Ruben H.}, editor = {Rizzi, A and Vichi, M} } @article { ISI:000186911900009, title = {Globally robust inference for the location and simple linear regression models}, journal = {JOURNAL OF STATISTICAL PLANNING AND INFERENCE}, volume = {119}, number = {2}, year = {2004}, month = {FEB 1}, pages = {353-375}, publisher = {ELSEVIER SCIENCE BV}, type = {Article}, address = {PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS}, abstract = {We define globally robust confidence intervals and p-values for the location and simple linear regression models. The need for robust inference has been noticed and partially addressed in the statistical literature (see for example the book by Barnett and Lewis, Outliers in Statistical Data, Wiley, New York, 1994 and references therein). We construct intervals that are stable in the sense of achieving coverages near the nominal ones even in the presence of outliers and other departures from the parametric model. Moreover, our intervals are informative in the sense of having relatively short lengths. These globally robust confidence intervals constitute an improvement over previous robust intervals which do not take into account the potential bias of the estimates. (C) 2002 Elsevier B.V. All rights reserved.}, keywords = {linear regerssion, maximum bias, robust confidence intervals, robust interference, Robustness}, issn = {0378-3758}, doi = {10.1016/S0378-3758(02)00490-1}, author = {Adrover, J and Salibian-Barrera, M and Zamar, R} } @article { ISI:000223519100004, title = {Uniform asymptotics for robust location estimates when the scale is unknown}, journal = {ANNALS OF STATISTICS}, volume = {32}, number = {4}, year = {2004}, month = {AUG}, pages = {1434-1447}, publisher = {INST MATHEMATICAL STATISTICS}, type = {Article}, address = {PO BOX 22718, BEACHWOOD, OH 44122 USA}, abstract = {Most asymptotic results for robust estimates rely on regularity conditions that are difficult to verily in practice. Moreover, these results apply to fixed distribution functions. In the robustness context the distribution of the data remains largely unspecified and hence results that hold uniformly over a set of possible distribution functions are of theoretical and practical interest. Also, it is desirable to be able to determine the size of the set of distribution functions where the uniform properties hold. In this paper we study the problem of obtaining verifiable regularity conditions that suffice to yield uniform consistency and uniform asymptotic normality for location robust estimates when the scale of the errors is unknown. We study M-location estimates calculated with an S-scale and we obtain uniform asymptotic results over contamination neighborhoods. Moreover, we show how to calculate the maximum size of the contamination neighborhoods where these uniform results hold. There is a trade-off between the size of these neighborhoods and the breakdown point of the scale estimate.}, keywords = {M-estimates, Robust inference, robust location and scale models, Robustness}, issn = {0090-5364}, doi = {10.1214/009053604000000544}, author = {Salibian-Barrera, M and Zamar, RH} }