################################################################################### ### ### This file contains analysis code used for the poster ### Effects of Pre-processing on Expression Estimates: Background and Normalization ### Presented August 2003 by Ben Bolstad, Biostatistics, University of California, Berkeley ### ### This code was used with R-1.7.1, affy (version 1.3.6), AffyExtensions (Version 0.5-11) ### Other packages are generally Bioconductor 1.2 release. ### ### It may or may not work with different versions of the software. No guarantees that it ### will work correctly on your machine are promised or implied. ### ### This file was Last edited on August 1, 2003 By Ben Bolstad ### ################################################################################### ### Load required R packages. library(AffyExtensions) ### Load the data from an R data file ### Usually you'd use ReadAffy() here ### the R object also contains a data.frame ### with chip names, spike-in concentrations load("~/DataInRDA/AffySpikeinU95A.RData") ### ### computing the expression measures for the ### background comparison ### eset.nobg <- threestep(Data,background=FALSE) eset.rma <- threestep(Data) eset.mas5 <- threestep(Data,background.method="MAS") eset.imm <- threestep(Data,background.method="IdealMM") eset.mas5imm <- threestep(Data,background.method="MASIM") ### is.spikein <- is.element(row.names(exprs(eset.nobg)),probeset.names) not.spikein <- !is.spikein ordering <- order(row.names(t(log2(exp.setup[,2:15])))) ### ### This is the code that does the standard curve adjustment ### we will apply it after expression computation for speed here ### slope <- callPA(Data,use.median=FALSE,difference.model=FALSE) slope <- slope$PA.stat get.SCAB <- function(X,Y){ X <- as.vector(X) Y <- as.vector(Y) X[X == -Inf] <- -3 X.notinf <- X#[X != -Inf] Y.notinf <- Y#[X != -Inf] plot(X.notinf,Y.notinf,xlab="Concentration",ylab="Observed Expression") lo.fit <- lowess(X.notinf,Y.notinf,f=0.3) lines(lo.fit) intercept <- approx(lo.fit,xout=10)$y - 10 abline(intercept,1) fitted <- approx(lo.fit,xout=seq(-3,10,by=0.2)) plot(fitted$x,(fitted$y - (intercept + fitted$x)),xlab="Concentration",ylab="Adjustment") lines(lowess(fitted$x, (fitted$y - (intercept + fitted$x)),f=0.1)) SCAB <- lowess(fitted$x,(fitted$y - (intercept + fitted$x)),f=0.1) x.extra <- -6 y.extra <- (SCAB$y[1] - SCAB$y[2])/(SCAB$x[1] - SCAB$x[2]) *(x.extra - SCAB$x[1]) + SCAB$y[1] #max(SCAB$y) SCAB$x <- c(x.extra,SCAB$x) SCAB$y <- c(y.extra,SCAB$y) x.extra <- 14 y.extra <- (SCAB$y[length(SCAB$x)] - SCAB$y[length(SCAB$x)-1])/(SCAB$x[length(SCAB$x)] - SCAB$x[length(SCAB$x)-1]) *(x.extra - SCAB$x[length(SCAB$x)]) + SCAB$y[length(SCAB$x)] #min(SCAB$y) SCAB$x <- c(SCAB$x,x.extra) SCAB$y <- c(SCAB$y,y.extra) SCAB } SCAB <- get.SCAB(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.nobg)[is.spikein,]) get.SCAB2 <- function(slopes, exp.setup){ plot(slopes,exp.setup,ylab="concentration",xlab="gamma") lines(lowess(slopes,exp.setup)) SCAB <- lowess(slopes,exp.setup) x.extra <- -4 y.extra <- (SCAB$y[1] - SCAB$y[2])/(SCAB$x[1] - SCAB$x[2]) *(x.extra - SCAB$x[1]) + SCAB$y[1] SCAB$x <- c(x.extra,SCAB$x) SCAB$y <- c(y.extra,SCAB$y) x.extra <- 2 y.extra <- (SCAB$y[length(SCAB$x)] - SCAB$y[length(SCAB$x)-1])/(SCAB$x[length(SCAB$x)] - SCAB$x[length(SCAB$x)-1]) *(x.extra - SCAB$x[length(SCAB$x)]) + SCAB$y[length(SCAB$x)] SCAB$x <- c(SCAB$x,x.extra) SCAB$y <- c(SCAB$y,y.extra) SCAB } notinf <- as.vector(t(log2(exp.setup[,1+ordering]))!= -Inf) SCAB2 <- get.SCAB2(as.vector(slope[is.spikein,])[notinf],as.vector(t(log2(exp.setup[,2:15]))[ordering,])[notinf]) plot(seq(-3,0.75,by=0.05),approx(SCAB,xout=approx(SCAB2,xout=seq(-3,0.75,by=0.05))$y)$y,ylim=c(0,10), ylab="adjustment",xlab="gamma",main="Adjustment curve",type="l",lwd=2,col="red") bg.correct.SCAB <- function(expr, slope, SCAB, SCAB2){ expr - approx(SCAB,xout=approx(SCAB2,xout=slope)$y)$y } bg.corrected.SCAB <- matrix(0,12626,59) for (i in 1:59){ bg.corrected.SCAB[,i] <- bg.correct.SCAB(exprs(eset.nobg)[,i],slope[,i],SCAB,SCAB2) } rownames(bg.corrected.SCAB) <- rownames(exprs(eset.nobg)) colnames(bg.corrected.SCAB) <- colnames(exprs(eset.nobg)) eset.SCA <- eset.nobg exprs(eset.SCA) <- bg.corrected.SCAB ### ### plot the observed expression versus the spike-in concentration ### for the spike-ins ### plot.OvE.spikeins <- function(X,Y,...){ # X should be spike in conc # Y should be spike in expression max.axes <- max(15) min.axes <- min(-3) plot(c(0,0),c(0,0),type="n",xlim=c(min.axes,max.axes),ylim=c(min.axes,max.axes),xlab="Log2(Conc)",ylab="Observed Expression",...) for (i in 1:14){ not.infinite <- !(X[i,] == -Inf) points(X[i,not.infinite], Y[i,not.infinite],col=i,pch=i) } Y.notinf <- NULL X.notinf <- NULL for (i in 1:14){ not.infinite <- !is.infinite(X[i,]) Y.notinf <- c(Y.notinf,Y[i,not.infinite]) X.notinf <- c(X.notinf,X[i,not.infinite]) } fit <- summary(lm(as.vector(Y.notinf) ~ as.vector(X.notinf))) slope <- fit$coef[2,1] r.sq <- fit$r.sq r.sq text(max.axes-2,min.axes+0.5,paste("Slope Overall:",round(slope,3))) text(max.axes-2,min.axes,paste("R-Sq Overall:",round(r.sq,3))) fit2 <- summary(lm(as.vector(Y.notinf[X.notinf <= 2]) ~ as.vector(X.notinf[X.notinf <=2]))) slope <- fit2$coef[2,1] r.sq <- fit2$r.sq text(min.axes+2,max.axes,paste("Slope Low End:",round(slope,3))) text(min.axes+2,max.axes-0.5,paste("R-Sq Low End:",round(r.sq,3))) fit3 <- summary(lm(as.vector(Y.notinf[X.notinf >=2 & X.notinf <= 8]) ~ as.vector(X.notinf[X.notinf >=2 & X.notinf <= 8]))) slope <- fit3$coef[2,1] r.sq <- fit3$r.sq print(fit3) text(min.axes+2,min.axes+0.5,paste("Slope Middle Range:",round(slope,3))) text(min.axes+2,min.axes,paste("R-Sq Middle Range:",round(r.sq,3))) fit4 <- summary(lm(as.vector(Y.notinf[X.notinf >=7]) ~ as.vector(X.notinf[X.notinf >=7]))) slope <- fit4$coef[2,1] r.sq <- fit4$r.sq text(max.axes-2,max.axes,paste("Slope high End:",round(slope,3))) text(max.axes-2,max.axes-0.5,paste("R-Sq high End:",round(r.sq,3))) } plot.OvE.spikeins(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.nobg)[is.spikein,],main="No background") plot.OvE.spikeins(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.rma)[is.spikein,],main="RMA convolution background") plot.OvE.spikeins(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.mas5)[is.spikein,],main="MAS 5 background") plot.OvE.spikeins(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.imm)[is.spikein,],main="Ideal Mismatch") plot.OvE.spikeins(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.mas5imm)[is.spikein,],main="Mas5/Ideal Mismatch") plot.OvE.spikeins(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.SCA)[is.spikein,],main="Standard Curve Adjustment") ### ### A function for plotting Observed Fold-change versus Expected Fold-change ### plot.spikeins <- function(X,Y,...){ max.axes <- max(12.5) min.axes <- min(-12.5,-12.5) plot(c(0,0),c(0,0),type="n",xlim=c(min.axes,max.axes),ylim=c(min.axes,max.axes),xlab="Truth",ylab="Observed",...) for (i in 1:14){ not.infinite <- !is.infinite(X[i,]) points(X[i,not.infinite], Y[i,not.infinite],col=i,pch=i) } Y.notinf <- NULL X.notinf <- NULL for (i in 1:14){ not.infinite <- !is.infinite(X[i,]) Y.notinf <- c(Y.notinf,Y[i,not.infinite]) X.notinf <- c(X.notinf,X[i,not.infinite]) } fit <- summary(lm(as.vector(Y.notinf) ~ as.vector(X.notinf))) slope <- fit$coef[2,1] r.sq <- fit$r.sq text(max.axes-1,min.axes+0.5,paste("Slope :",round(slope,3))) text(max.axes-1,min.axes,paste("R-Sq :",round(r.sq,3))) } find.ratios.fast <- function(data, groups){ grp.means <- matrix(0,dim(data)[1],14) grp.means[,1] <- apply(data[,c(1,21,41)],1,mean) grp.means[,2] <- apply(data[,c(2,22,42)],1,mean) grp.means[,3] <- apply(data[,c(3,23)],1,mean) grp.means[,4] <- apply(data[,c(4,24,43)],1,mean) grp.means[,5] <- apply(data[,c(5,25,44)],1,mean) grp.means[,6] <- apply(data[,c(6,26,45)],1,mean) grp.means[,7] <- apply(data[,c(7,27,46)],1,mean) grp.means[,8] <- apply(data[,c(8,28,47)],1,mean) grp.means[,9] <- apply(data[,c(9,29,48)],1,mean) grp.means[,10] <- apply(data[,c(10,30,49)],1,mean) grp.means[,11] <- apply(data[,c(11,31,50)],1,mean) grp.means[,12] <- apply(data[,c(12,32,51)],1,mean) grp.means[,13] <- apply(data[,c(13,14,15,16,33,34,35,36,52,53,54,55)],1,mean) grp.means[,14] <- apply(data[,c(17,18,19,20,37,38,39,40,56,57,58,59)],1,mean) results <- matrix(0,dim(data)[1],91) ind <- 0 for (i in 1:13){ for (j in (i+1):14){ ind <- ind +1 results[,ind] <- grp.means[,i] - grp.means[,j] } } results } groups <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,13,13,13,14,14,14,14, 1,2,3,4,5,6,7,8,9,10,11,12,13,13,13,13,14,14,14,14, 1,2,4,5,6,7,8,9,10,11,12,13,13,13,13,14,14,14,14) ### ### Compute expected and observed fold changes. ### ratios.spikein <- find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] ratios.nobg <- find.ratios.fast(exprs(eset.nobg),groups) ratios.rma <- find.ratios.fast(exprs(eset.rma),groups) ratios.mas5 <- find.ratios.fast(exprs(eset.mas5),groups) ratios.imm <- find.ratios.fast(exprs(eset.imm),groups) ratios.mas5imm <- find.ratios.fast(exprs(eset.mas5imm),groups) ratios.SCA <- find.ratios.fast(exprs(eset.SCA),groups) ### ### Now Plot Observed vs Expected fold changes. ### plot.spikeins(ratios.spikein,ratios.nobg[is.spikein,],main="No background");abline(0,1) plot.spikeins(ratios.spikein,ratios.rma[is.spikein,],main="RMA convolution background");abline(0,1) plot.spikeins(ratios.spikein,ratios.mas5[is.spikein,],main="MAS 5 background");abline(0,1) plot.spikeins(ratios.spikein,ratios.imm[is.spikein,],main="Ideal Mismatch");abline(0,1) plot.spikeins(ratios.spikein,ratios.mas5imm[is.spikein,],main="Mas5/Ideal Mismatch");abline(0,1) plot.spikeins(ratios.spikein,ratios.SCA[is.spikein,],main="Standard Curve Adjustment");abline(0,1) ### ### Composite M vs A plots ### palette <- rainbow(13) palette.matrix <- matrix("#000000",14,91) palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -1] <- palette[1] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 1] <- palette[1] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -2] <- palette[2] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 2] <- palette[2] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -3] <- palette[3] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 3] <- palette[3] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -4] <- palette[4] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 4] <- palette[4] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -5] <- palette[5] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 5] <- palette[5] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -6] <- palette[6] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 6] <- palette[6] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -7] <- palette[7] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 7] <- palette[7] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -8] <- palette[8] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 8] <- palette[8] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -9] <- palette[9] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 9] <- palette[9] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -10] <- palette[10] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 10] <- palette[10] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -11] <- palette[11] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 11] <- palette[11] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -12] <- palette[12] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == 12] <- palette[12] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == -Inf] <- palette[13] palette.matrix[find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,] == Inf] <- palette[13] find.averages.fast <- function(data, groups){ grp.means <- matrix(0,dim(data)[1],14) grp.means[,1] <- apply(data[,c(1,21,41)],1,mean) grp.means[,2] <- apply(data[,c(2,22,42)],1,mean) grp.means[,3] <- apply(data[,c(3,23)],1,mean) grp.means[,4] <- apply(data[,c(4,24,43)],1,mean) grp.means[,5] <- apply(data[,c(5,25,44)],1,mean) grp.means[,6] <- apply(data[,c(6,26,45)],1,mean) grp.means[,7] <- apply(data[,c(7,27,46)],1,mean) grp.means[,8] <- apply(data[,c(8,28,47)],1,mean) grp.means[,9] <- apply(data[,c(9,29,48)],1,mean) grp.means[,10] <- apply(data[,c(10,30,49)],1,mean) grp.means[,11] <- apply(data[,c(11,31,50)],1,mean) grp.means[,12] <- apply(data[,c(12,32,51)],1,mean) grp.means[,13] <- apply(data[,c(13,14,15,16,33,34,35,36,52,53,54,55)],1,mean) grp.means[,14] <- apply(data[,c(17,18,19,20,37,38,39,40,56,57,58,59)],1,mean) results <- matrix(0,dim(data)[1],91) ind <- 0 for (i in 1:13){ for (j in (i+1):14){ ind <- ind +1 results[,ind] <- 1/2*(grp.means[,i] + grp.means[,j]) } } results } ### ### Compute the "A" terms for the M vs A plots ### averages.nobg <- find.averages.fast(exprs(eset.nobg),groups) averages.rma <- find.averages.fast(exprs(eset.rma),groups) averages.mas5 <- find.averages.fast(exprs(eset.mas5),groups) averages.imm <- find.averages.fast(exprs(eset.imm),groups) averages.mas5imm <- find.averages.fast(exprs(eset.mas5imm),groups) averages.SCA <- find.averages.fast(exprs(eset.SCA),groups) ### ### Now actually do the plotting ### plot(averages.nobg[not.spikein],ratios.nobg[not.spikein],pch=".",ylim=c(-12,12),xlim=c(-2,15),xlab="A",ylab="M",main="No background") text(averages.nobg[is.spikein],ratios.nobg[is.spikein],as.character(find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,]),col=palette.matrix) plot(averages.rma[not.spikein],ratios.rma[not.spikein],pch=".",ylim=c(-12,12),xlim=c(-2,15),xlab="A",ylab="M",main="RMA convolution background") text(averages.rma[is.spikein],ratios.rma[is.spikein],as.character(find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,]),col=palette.matrix) plot(averages.mas5[not.spikein],ratios.mas5[not.spikein],pch=".",ylim=c(-12,12),xlim=c(-2,15),xlab="A",ylab="M",main="MAS 5 background") text(averages.mas5[is.spikein],ratios.mas5[is.spikein],as.character(find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,]),col=palette.matrix) plot(averages.imm[not.spikein],ratios.imm[not.spikein],pch=".",ylim=c(-12,12),xlim=c(-2,15),xlab="A",ylab="M",main="Ideal Mismatch") text(averages.imm[is.spikein],ratios.imm[is.spikein],as.character(find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,]),col=palette.matrix) plot(averages.mas5imm[not.spikein],ratios.mas5imm[not.spikein],pch=".",ylim=c(-12,12),xlim=c(-2,15),xlab="A",ylab="M",main="Mas5/Ideal Mismatch") text(averages.mas5imm[is.spikein],ratios.mas5imm[is.spikein],as.character(find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,]),col=palette.matrix) plot(averages.SCA[not.spikein],ratios.SCA[not.spikein],pch=".",ylim=c(-12,12),xlim=c(-2,15),xlab="A",ylab="M",main="Standard Curve Adjustment") text(averages.SCA[is.spikein],ratios.SCA[is.spikein],as.character(find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,]),col=palette.matrix) ### ### ROC curve using Fold change ### compute.ROC <- function(Nonspikeins, Spikeins){ #we will use observed foldchange as test statistic #FalsePositive is a non spikein called differential #TruePositive is a spike called differential #Want in the end to plot TruePositive vs False Positive FP <- c(0,,0.001,0.0025,0.005,0.0075,0.01,0.02,0.03,0.04, 0.05,0.10,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1) ROC.columns <- function(Nonspike,spike,FP){ cutoffs <- quantile(sort(-abs(Nonspike)),probs=FP) TP <- rep(0,length(FP)) for (i in 1:length(FP)){ TP[i] <-sum(abs(spike) > abs(cutoffs[i]))/length(spike) } TP } ALL.TP <- matrix(0,length(FP),91) for (i in 1:91){ ALL.TP[,i] <- ROC.columns(Nonspikeins[,i],Spikeins[,i],FP) } matrix(c(FP,apply(ALL.TP,1,mean)),length(FP),2) } my.palette <- rainbow(6) plot(compute.ROC(ratios.nobg[not.spikein,],ratios.nobg[is.spikein,]),type='l',col=my.palette[1],xlab="False Positives",ylab="True Positives",lwd=3,xlim=c(0,0.1),ylim=c(0.5,1), main="ROC curve based on fold-change") lines(compute.ROC(ratios.rma[not.spikein,],ratios.rma[is.spikein,]),type='l',col=my.palette[2],lwd=3,lty=1) lines(compute.ROC(ratios.mas5[not.spikein,],ratios.mas5[is.spikein,]),type='l',col=my.palette[3],lwd=3,lty=1) lines(compute.ROC(ratios.imm[not.spikein,],ratios.imm[is.spikein,]),type='l',col=my.palette[4],lwd=3,lty=1) lines(compute.ROC(ratios.mas5imm[not.spikein,],ratios.mas5imm[is.spikein,]),type='l',col=my.palette[5],lwd=3,lty=1) lines(compute.ROC(ratios.SCA[not.spikein,],ratios.SCA[is.spikein,]),type='l',col=my.palette[5],lwd=3,lty=1) legend(0.06,0.75,lwd=3,lty=c(rep(1,6)),legend=c("No background","RMA","MAS 5","Ideal Mismatch", "Mas5/Ideal Mismatch","Standard Curve Adjustment"), col=c(my.palette)) ################################################################################################## ### ### Now the normalization analysis code ### ################################################################################################## ### ### A quick look at raw intensities shows that a great deal of normalization is ### not really required ### boxplot(data.frame(log2(pm(Data)))) ### ### Calculate expression values ### eset.nonorm <- threestep(Data,background=FALSE,normalize=FALSE) eset.quantiles <- threestep(Data,background=FALSE) eset.scaling <- threestep(Data,background=FALSE,normalize.method="scaling") ### ### boxplot the expression estimates ### boxplot(data.frame(exprs(eset.nonorm)),names=1:59,main="No Normalization") boxplot(data.frame(exprs(eset.quantiles)),names=1:59, main="Quantiles") boxplot(data.frame(exprs(eset.scaling)),names=1:59, main="scaling") ### ### Fold change estimates ### ratios.nonorm <- find.ratios.fast(exprs(eset.nonorm),groups) ratios.quantiles <- find.ratios.fast(exprs(eset.quantiles),groups) ratios.scaling <- find.ratios.fast(exprs(eset.scaling),groups) ### ### Boxplot fold change of non spike-ins. ### boxplot(ratios.nonorm[not.spikein,],ratios.quantiles[not.spikein,],ratios.scaling[not.spikein,],ylim=c(-0.25,0.25),names=c("none","scaling","quantile"),col=c("red","blue","green"),main="Fold-Change of non-spike-ins") abline(0,0) ### ### Break into 20% quantiles ### vars.nonorm <- apply(exprs(eset.nonorm),1,var) vars.quantile <- apply(exprs(eset.quantiles),1,var) vars.scaling <- apply(exprs(eset.scaling),1,var) average.nonorm <- apply(exprs(eset.nonorm),1,mean) average.quantile <- apply(exprs(eset.quantiles),1,mean) average.scaling <- apply(exprs(eset.scaling),1,mean) var.boxplot <- function(averages,vars,...){ N.20 <- averages <= quantile(averages,probs=0.2) N.40 <- averages <= quantile(averages,probs=0.4) & averages > quantile(averages,probs=0.2) N.60 <- averages <= quantile(averages,probs=0.6) & averages > quantile(averages,probs=0.4) N.80 <- averages <= quantile(averages,probs=0.8) & averages > quantile(averages,probs=0.6) N.100 <- averages <= quantile(averages,probs=1) & averages > quantile(averages,probs=0.8) boxplot(vars[N.20],vars[N.40],vars[N.60],vars[N.80],vars[N.100],names=c("20%","40%","60%","80%","100%"),xlab="quantile",ylab="variance",...) } par(mfrow=c(1,3)) var.boxplot(average.nonorm[not.spikein],vars.nonorm[not.spikein],ylim=c(0,0.05),main="none",col="red") var.boxplot(average.quantile[not.spikein],vars.quantile[not.spikein],ylim=c(0,0.05),main="quantile",col="blue") var.boxplot(average.scaling[not.spikein],vars.scaling[not.spikein],ylim=c(0,0.05),main="scaling",col="green") ### ### Compute the "A" terms for the M vs A plots ### averages.nonorm <- find.averages.fast(exprs(eset.nonorm),groups) averages.quantile <- find.averages.fast(exprs(eset.quantiles),groups) averages.scaling <- find.averages.fast(exprs(eset.scaling),groups) plot(averages.nonorm[not.spikein],ratios.nonorm[not.spikein],pch=".",ylim=c(-12,12),xlim=c(-2,15),xlab="A",ylab="M",main="No background") text(averages.nonorm[is.spikein],ratios.nonorm[is.spikein],as.character(find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,]),col=palette.matrix) plot(averages.quantile[not.spikein],ratios.quantiles[not.spikein],pch=".",ylim=c(-12,12),xlim=c(-2,15),xlab="A",ylab="M",main="No background") text(averages.quantile[is.spikein],ratios.quantiles[is.spikein],as.character(find.ratios.fast(t(log2(exp.setup[,2:15])),groups)[ordering,]),col=palette.matrix) plot.spikeins(ratios.spikein,ratios.nonorm[is.spikein,]) plot.spikeins(ratios.spikein,ratios.quantiles[is.spikein,]) plot.spikeins(ratios.spikein,ratios.scaling[is.spikein,]) ### ### ROC curve comparing normalization methods ### my.palette <- c("red","green","blue") plot(compute.ROC(ratios.nonorm[not.spikein,],ratios.nonorm[is.spikein,]),type='l',col=my.palette[1],xlab="False Positives",ylab="True Positives",lwd=3,xlim=c(0,0.1),ylim=c(0.5,1), main="ROC curve based on fold-change") lines(compute.ROC(ratios.quantiles[not.spikein,],ratios.quantiles[is.spikein,]),type='l',col=my.palette[2],lwd=3,lty=1) lines(compute.ROC(ratios.scaling[not.spikein,],ratios.scaling[is.spikein,]),type='l',col=my.palette[3],lwd=3,lty=1) legend(0.06,0.75,lwd=3,lty=c(rep(1,6)),legend=c("No normalization","Quantile","Scaling"), col=c(my.palette)) plot.OvE.spikeins(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.nonorm)[is.spikein,],main="No normalization") plot.OvE.spikeins(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.quantiles)[is.spikein,],main="Quantile") plot.OvE.spikeins(t(log2(exp.setup[,2:15]))[ordering,],exprs(eset.scaling)[is.spikein,],main="Scaling")