source("http://www.uvm.edu/~rsingle/stat221/data/scripts-221.R") dat <- otherdata("gas.dat") dat$STATE <- as.character(dat$STATE) attach(dat) #variables are now in the search() path # TAX = motor fuel tax rate (in cents per gallon) # ROAD = number of miles of federal-aid primary highways (in thousands) # PLIC = percentage of licensed drivers # INC = per capita income (in thousands of dollars) # FUEL = per capita fuel consumption (in gallons per person) # STATE= two letter state code f.t <- lm(FUEL~TAX) anova(f.t) mse <- 10189.28 #plot Cook's distances plot(1:48,cooks.distance(f.t),type="n") text(1:48,cooks.distance(f.t),STATE) #Visualize change in regression slope due to obs #40 plot(TAX,FUEL) abline((lm(FUEL ~TAX) )$coef, col=1) abline((lm(FUEL[-40]~TAX[-40]))$coef, col=2) #Compute Euclidean distance between Beta.hat and Beta.hat[-40] #in 3 different ways summary(lm(FUEL ~TAX) )$coef summary(lm(FUEL[-40]~TAX[-40]))$coef #1--- (983.75 - 931.78)^2 + (-53.08 - -47.30)^2 #2--- diff <- lm(FUEL~TAX)$coef - lm(FUEL[-40]~TAX[-40])$coef sum(diff^2) #3--- t(diff) %*% diff #Compute standardized difference between Beta.hat and Beta.hat[-40] #in 3 different ways #1--- cooks.distance(f.t)[40] #2--- X <- cbind( seq(1,1,length=48), TAX ) head(X) XtX <- t(X) %*% X ( t(diff) %*% XtX %*% diff ) / (2*mse) #3--- r.i <- rstandard(f.t) #internally studentized residuals h.i <- hatvalues(f.t) #leverage or "hat" values ( r.i[40]^2 * h.i[40] )/( (1+1) * (1 - h.i[40]) ) #Assess importance pf(.2077,1,46) #0.3492797 #Detach the data detach(dat)