# Splus code for lab6 # note still a few problems with code in R (diag in IRLS) # code written by bmb 18-4-2000 19-4-2000 #readin the data ICU<- read.table("ICU.dat") names(ICU) <- c("ID","STA","AGE","SEX","RACE","SER","CAN","CRN","INF","CPR","SYS","HRA","PRE","TYPE","FRA","P02","PH","PCO","BIC","CRE","LOC") #recode as appropriate ICU$LOC[ICU$LOC ==2] <-1 ICU$RACE[ICU$RACE ==3] <- 1 #fit the first logistic model fit <- glm(STA ~ AGE+SER+CAN,family=binomial,data=ICU) summary(fit) #a function to implement Iteratively Reweighted Least Squares IRLSit <- function(X,Y,beta0,its){ for (i in 1:its){ theta <- X%*%beta0 pihat <- exp(theta)/(1+exp(theta)) W <- diag(pihat*(1-pihat),200) V <- solve(t(X)%*%W%*%X) Z <- V%*%t(X)%*%(Y-pihat) beta0 <- beta0 + Z print(beta0) } print(V) } # create the data matrix and response matrix then perform the IRLS algorithm X <- cbind(rep(1,200),ICU$AGE,ICU$SER,ICU$CAN) Y <- ICU$STA IRLSit(X,Y,rbind(0,0,0,0),4) # functions to add the p-values to the output from summary.glm() # call like addpvals(summary(fit)$coef) pvalue <- function(t) { p <- 2*(1-pnorm(abs(t))) p } addpvals <- function(coef){ p <- pvalue(coef[,3]) cbind(coef,p) } #stepwise GLM # basically repeat process deleting out the "non" significant parameters stepfit <- glm(STA ~ AGE + SEX + RACE + SER + CAN +CRN + INF + CPR +SYS + HRA + PRE + TYPE + FRA + P02 + PH +PCO + BIC + CRE + LOC,family=binomial,data=ICU) addpvals(summary(stepfit)$coef) # after using the stepwise deletion and then addition procedures should have some final model #finalfit <- glm(STA ~ .......) # get fitted values for pi(x_i) fittedvalues <- predict(finalfit,type="response") #bind these fitted values to the data matrix datafit <- cbind(ICU,fittedvalues) #sort matrix matrixorder <- order(fittedvalues) datafit <- datafit[matrixorder,] #examine the ten highest risk patients #note you should be more efficient and extract only the columns containing #information you used in your final fitted model datafit[191:200,] #now plot your final fitted model as a function of age #note that the model I provide here is by no means the correct model #that you should have at this stage ie you should follow the procedure #outlined in the lab to get the model finalmodel <- glm(STA ~ AGE + LOC,family=binomial, data=ICU) newdat <- as.data.frame(cbind(1:100,rep(0,100))) newdat1 <- as.data.frame(cbind(1:100,rep(1,100))) names(newdat) <- c("AGE","LOC") names(newdat1) <- c("AGE","LOC") p1 <- predict(finalmodel,newdat,type="response") p2 <- predict(finalmodel,newdat1,type="response") plot(1:100,p2,type="n",ylim=c(0,1)) lines(1:100,p1,lty=1) lines(1:100,p2,lty=3) #now you have to get a 95 percent confidence interval for r.r. # this should be easy enough