############################################################################## ### ### ### file: AffyAnalysisCode.R ### ### Author: B. M. Bolstad ### ### Aim: Implement pre-processing analysis for Affymetrix microarray dataset ### ############################################################################# ### ### ### Use code is designed to work with the BioC 1.8 release set of packages ### on R-2.3.x. it may or may not work on later versions. ### ### ### ### Loading the affyPLM library will also load the affy library ### and other dependencies. ### library(affyPLM) ### ### This command assumes that all 121 cel files for the entire ### dataset are in the current working directory. This can be ### found using getwd() and set with setwd() (or the menu command if using ### R on windows. ### abatch.rawdata <- ReadAffy(filenames=list.celfiles()[97:121]) ### These correspond to the following CEL files. ### ### "GSM50108.CEL" "GSM50109.CEL" "GSM50110.CEL" "GSM50111.CEL" "GSM50112.CEL" ### "GSM50113.CEL" "GSM50114.CEL" "GSM50115.CEL" "GSM50116.CEL" "GSM50117.CEL" ### "GSM50118.CEL" "GSM50119.CEL" "GSM50120.CEL" "GSM50121.CEL" "GSM50122.CEL" ### "GSM50123.CEL" "GSM50124.CEL" "GSM50125.CEL" "GSM50126.CEL" "GSM50127.CEL" ### "GSM50128.CEL" "GSM50129.CEL" "GSM50130.CEL" "GSM50131.CEL" "GSM50132.CEL" ### ### cleanup the CEL file sample names by removing the .CEL sampleNames(abatch.rawdata) <- do.call("c",strsplit(sampleNames(abatch.rawdata),".CEL")) ### ### First some basic inspection of the raw PM intensities ### ### boxplot of log2 PM intensities by array boxplot(abatch.rawdata,col=rainbow(25),main="Unprocessed Intensities") ### density plots of log2 PM intensities by array hist(abatch.rawdata,col=rainbow(25),main="Unprocessed Intensities") ### comparing relative values of PM intensities by array Mbox(abatch.rawdata,range=0,ylim=c(-0.75,0.75),col=rainbow(25),main="Unprocessed Intensities") ### MA-plots for each array comparing to probe-wise median array par(mfrow=c(2,2)) MAplot(abatch.rawdata,plot.method="smoothScatter",ylim=c(-3,3),nrpoints=256) ### Compute RMA expression values eset.rma <- rma(abatch.rawdata) ### Boxplot of the RMA expression values shows normalization ### has removed many of the differences. par(mfrow=c(1,1),las=2) boxplot(eset.rma,col=rainbow(25),main="RMA Expression values") ### ### Now a quality analysis. ### ### this command requires a large amount of memory Pset <- fitPLM(abatch.rawdata) ### ### The NUSE and RLE statistics are useful for judging which ### arrays have lower quality data (and thus might be removed ### when subsequent down-stream analysis takes place). ### par(las=2) RLE(Pset,col=rainbow(25),main="RLE") RLE(Pset,type="density",col=rainbow(25),main="RLE") NUSE(Pset,col=rainbow(25), main="NUSE") NUSE(Pset,type="density",col=rainbow(25),main="NUSE") ### ### Looking at each chip for possible artifacts (or perhaps ### uniformly low weights). If running this code interactively ### it might be useful to to use par(ask=TRUE) first. ### for (i in 1:25){ par(mfrow=c(2,3)) image(Pset,which=i) image(Pset,which=i,type="resids") image(Pset,which=i,type="pos.resids") image(Pset,which=i,type="neg.resids") image(Pset,which=i,type="sign.resids") } ### ### MA-plots of expression values. ### par(mfrow=c(2,2)) MAplot(Pset,plot.method="smoothScatter",ylim=c(-3,3),nrpoints=256)