## ## file: Lab2.R ## ## Author: B. M. Bolstad ## Written: Mar 3, 2004 ## ## Aim: Some code that would be useful for the analysis of Lab2 ## Note that many more things are possible ## postscript("lab2.ps") bonding <- read.table("bonding.dat",header=FALSE) names(bonding) <- c("S","F","P","T","D") my.fit <- lm(S ~ F + P + T + D, data=bonding) summary(my.fit) anova(my.fit) par(mfrow=c(2,2)) plot(my.fit) par(mfrow=c(1,1)) qqnorm(resid(my.fit)) qqline(resid(my.fit)) ### Some simulated qqplots par(mfrow=c(3,3)) for (i in 1:9){ x <- rnorm(30) qqnorm(x) qqline(x) } par(mfrow=c(2,2)) plot(bonding[,"F"],resid(my.fit),xlab="F",ylab="residuals",main="Residuals vs F") plot(bonding[,"P"],resid(my.fit),xlab="P",ylab="residuals",main="Residuals vs P") plot(bonding[,"T"],resid(my.fit),xlab="T",ylab="residuals",main="Residuals vs T") plot(bonding[,"D"],resid(my.fit),xlab="D",ylab="residuals",main="Residuals vs D") ### ### Some tests using the ANOVA. ### first we will do it conventionally ### ie H: submodel A: full model ### my.fit.restrict1 <- lm(S ~ F + P + T, data=bonding) anova(my.fit.restrict1,my.fit) FP <- bonding[,"F"] + bonding[,"P"] my.fit.restrict2 <- lm(S ~ FP + T + D, data=bonding) anova(my.fit.restrict2,my.fit) ### ### suppose we have a linear combination P'\beta ### then it can be shown that ### P'(\hat \beta - \beta)/(s\sqrt(P'(X'X)^(-1)P)) ### has t distribution with n-p degrees of freedom ### ### ### A 95% CI for 2\beta_1 +\beta_2 + 4\beta_3 ### will be given by ### 2 \hat \beta_1 + \hat \beta_2 + 4 \hat \beta_3 ### +/- t_{n-p}^(1-\alpha/2) * s * sqrt([0, 2, 1, 4,0] (X'X)^(-1)[0, 2, 1, 4,0]') ### P <- c(0,2,1,4,0) X <- as.matrix(cbind(1, bonding[,2:5])) lower.end <- sum(P*my.fit$coef) - qt(0.975,30-5)*summary(my.fit)$s* sqrt(t(P) %*% solve(t(X)%*%X) %*% P) upper.end <- sum(P*my.fit$coef) + qt(0.975,30-5)*summary(my.fit)$s* sqrt(t(P) %*% solve(t(X)%*%X) %*% P) c(lower.end,upper.end) ### ### Simultaneous CI using Scheffe ### ### ### ### P <- c(0,0,0,0,1) Scheffe.interval1.low <- t(P) %*% my.fit$coef - summary(my.fit)$s*sqrt(3*qf(0.95,3,25)) * sqrt(t(P) %*% solve(t(X)%*%X) %*% P) Scheffe.interval1.high <- t(P) %*% my.fit$coef + summary(my.fit)$s*sqrt(3*qf(0.95,3,25)) * sqrt(t(P) %*% solve(t(X)%*%X) %*% P) P <- c(0,2,0,1,0) Scheffe.interval2.low <- t(P) %*% my.fit$coef - summary(my.fit)$s*sqrt(3*qf(0.95,3,25)) * sqrt(t(P) %*% solve(t(X)%*%X) %*% P) Scheffe.interval2.high <- t(P) %*% my.fit$coef + summary(my.fit)$s*sqrt(3*qf(0.95,3,25)) * sqrt(t(P) %*% solve(t(X)%*%X) %*% P) c(Scheffe.interval1.low,Scheffe.interval1.high) c(Scheffe.interval2.low,Scheffe.interval2.high) ### ### Simultaneous CI using Bonferroni ### ### ### ### P <- c(0,0,0,0,1) Bonferroni.interval1.low <- t(P) %*% my.fit$coef - summary(my.fit)$s*qt(1-0.05/(4),25) * sqrt(t(P) %*% solve(t(X)%*%X) %*% P) Bonferroni.interval1.high <- t(P) %*% my.fit$coef + summary(my.fit)$s*qt(1-0.05/(4),25) * sqrt(t(P) %*% solve(t(X)%*%X) %*% P) P <- c(0,2,0,1,0) Bonferroni.interval2.low <- t(P) %*% my.fit$coef - summary(my.fit)$s*qt(1-0.05/(4),25) * sqrt(t(P) %*% solve(t(X)%*%X) %*% P) Bonferroni.interval2.high <- t(P) %*% my.fit$coef + summary(my.fit)$s*qt(1-0.05/(4),25) * sqrt(t(P) %*% solve(t(X)%*%X) %*% P) c(Bonferroni.interval1.low,Bonferroni.interval1.high) c(Bonferroni.interval2.low,Bonferroni.interval2.high) ### ### Just for comparison purposes CI without taking simultaneity into account ### P <- c(0,0,0,0,1) interval1.low <- sum(P*my.fit$coef) - qt(0.975,30-5)*summary(my.fit)$s* sqrt(t(P) %*% solve(t(X)%*%X) %*% P) interval1.high <- sum(P*my.fit$coef) + qt(0.975,30-5)*summary(my.fit)$s* sqrt(t(P) %*% solve(t(X)%*%X) %*% P) P <- c(0,2,0,1,0) interval2.low <- sum(P*my.fit$coef) - qt(0.975,30-5)*summary(my.fit)$s* sqrt(t(P) %*% solve(t(X)%*%X) %*% P) interval2.high <- sum(P*my.fit$coef) + qt(0.975,30-5)*summary(my.fit)$s* sqrt(t(P) %*% solve(t(X)%*%X) %*% P) c(interval1.low,interval1.high) c(interval2.low,interval2.high) ### ### Now lets do the ellipse ### first the code ### ### my.ellipse <- function(Q,x0,k){ v1 <- eigen(Q)$vec[,1] v2 <- eigen(Q)$vec[,2] e1 <- sqrt(1/eigen(Q)$val[1]*k) e2 <- sqrt(1/eigen(Q)$val[2]*k) theta <- (0:100)*2*pi/100 x <- e1*v1[1]*cos(theta) + e2*v2[1]*sin(theta) + x0[1] y <- e1*v1[2]*cos(theta) + e2*v2[2]*sin(theta) + x0[2] cbind(x,y) } ### ### Now the parameters ### C <- t(matrix(c(0,0,0,0,1,0,2,0,1,0),ncol=2,nrow=5)) x0 <- C%*%my.fit$coef Q <- solve(C %*% solve(t(X)%*%X) %*% t(C)) k <- summary(my.fit)$s^2* qr(C)$rank*qf(0.95,2,25) par(mfrow=c(1,1)) plot(my.ellipse(Q,x0,k),type="l",xlim=c(-0.4,1),ylim=c(-0.6,1.82), xlab="beta4",ylab="2beta1 + beta3",main="Confidence ellipse for beta4 and 2beta1 + beta3",lwd=2,col="black") plot(my.ellipse(Q,x0,k),type="l",xlim=c(-0.4,1.5),ylim=c(-0.8,2.0), xlab="beta4",ylab="2beta1 + beta3",main="Confidence Methods for beta4 and 2beta1 + beta3",lwd=2,col="black") lines(c(Scheffe.interval1.low,Scheffe.interval1.low),c(-10,10),lty=2,lwd=3,col="green") lines(c(Scheffe.interval1.high,Scheffe.interval1.high),c(-10,10),lty=2,lwd=3,col="green") lines(c(-10,10),c(Scheffe.interval2.low,Scheffe.interval2.low),lty=2,lwd=3,col="green") lines(c(-10,10),c(Scheffe.interval2.high,Scheffe.interval2.high),lty=2,lwd=3,col="green") lines(c(Bonferroni.interval1.low,Bonferroni.interval1.low),c(-10,10),lty=3,lwd=3,col="blue") lines(c(Bonferroni.interval1.high,Bonferroni.interval1.high),c(-10,10),lty=3,lwd=3,col="blue") lines(c(-10,10),c(Bonferroni.interval2.low,Bonferroni.interval2.low),lty=3,lwd=3,col="blue") lines(c(-10,10),c(Bonferroni.interval2.high,Bonferroni.interval2.high),lty=3,lwd=3,col="blue") lines(c(interval1.low,interval1.low),c(-10,10),lty=4,lwd=3,col="red") lines(c(interval1.high,interval1.high),c(-10,10),lty=4,lwd=3,col="red") lines(c(-10,10),c(interval2.low,interval2.low),lty=4,lwd=3,col="red") lines(c(-10,10),c(interval2.high,interval2.high),lty=4,lwd=3,col="red") legend(1.0,1.0,c("Ellipse","Std","Bonferroni","Scheffe"),lty=c(1,4,3,2),lwd=2,col=c("black","red","blue","green"),cex=1.5) ### ### ### lets run through the model selection procedure ### ### F2 <- bonding[,"F"]^2 P2 <- bonding[,"P"]^2 T2 <- bonding[,"T"]^2 D2 <- bonding[,"D"]^2 Fullmodel.fit <- lm (S ~ F + P + T + D + F:P + F:T + F:D + P:T + P:D + T:D + F2 + P2 + T2 + D2 , data=bonding) summary(Fullmodel.fit) # remove T:D Reducedmodel.fit <- lm (S ~ F + P + T + D + F:P + F:T + F:D + P:T + P:D + F2 + P2 + T2 + D2 , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ F + P + T + D + F:P + F:T + F:D + P:T + P:D + F2 + P2 + D2 , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ F + P + T + D + F:P + F:T + F:D + P:T + P:D + F2 + P2 , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ F + P + T + D + F:P + F:T + F:D + P:T + P:D , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ F + P + T + D + F:P + F:D + P:T + P:D , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ F + P + T + D + F:P + F:D + P:D , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ F + P + T + D + F:P + F:D , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ F + P + T + D + F:D , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ F + P + T + D , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ P + T + D , data=bonding) summary(Reducedmodel.fit) Reducedmodel.fit <- lm (S ~ P + T, data=bonding) summary(Reducedmodel.fit) ## just for fun lets test the reduced model versus the full model anova(Reducedmodel.fit,Fullmodel.fit) # the following plot suggests a designed experiment on an orthogonal basis pairs(bonding[,2:5]) ### ### ### Lets fit it as an ANOVA model ### ### my.fit.anova <- lm(S ~ as.factor(F)+ as.factor(P) + as.factor(T) + as.factor(D), data=bonding) summary(my.fit.anova) anova(my.fit.anova) dev.off()