# Lab 0 Sample S Code # take a random sample of size 100 from N(25,4) dist > n1 <- rnorm(100,25,4) # 96% CI limits assuming we know the variance > mean(n1) + qnorm(0.98)*4/sqrt(100) [1] 26.17857 > mean(n1) - qnorm(0.98)*4/sqrt(100) [1] 24.53557 # now lets assume for a moment we do not know the population variance # the 96% ci would then be > mean(n1) + qt(0.98,99)*sqrt(var(n1))/sqrt(100) [1] 26.13879 > mean(n1) - qt(0.98,99)*sqrt(var(n1))/sqrt(100) [1] 24.57536 # now lets carry out the hypothesis tests > Zstat <- (mean(n1) - 25)/(4/sqrt(100)) > 2*(1-pnorm(abs(Zstat))) [1] 0.3720269 # assume for a moment that we do not know the variance > tstat <- (mean(n1) - 25)/(sqrt(var(n1))/sqrt(100)) > 2*(1-pt(abs(tstat),99)) [1] 0.3441034 # generate 500 samples of size 100 > nsamps <- matrix(rnorm(500*100,25,4),100,500) > sampmeans <- apply(nsamps,2,mean) > sampsds <- sqrt(apply(nsamps,2,var)) # calculate the intervals > ul <- sampmeans + qt(0.975,99)*sampsds/sqrt(100) > ll <- sampmeans - qt(0.975,99)*sampsds/sqrt(100) # now work out how many fail to cover the true mean > below <- 25 < ll > above <- 25 > ul > sum(below + above) [1] 20 > 20/500 [1] 0.04 # generate a chi-sq variable > norm5 <- rnorm(5) > sum(norm5^2) [1] 11.10340 # let generate 500 samples > normforchi <- matrix(rnorm(500*5),5,500) # a little function to sum the squares of the elements in a matrix >sumsqcol <- function(x){ + sum(x^2) + } # generate the sample > chisqsample <- apply(normforchi,2,sumsqcol) # plot it and superimpose the chisq(5) pdf upon it > hist(chisqsample,probability=T) > lines(0:250/10,dchisq(0:250/10,5)) #compare the cdfs > cdf.compare(chisqsample,dist="chisquare",df=5) # now lets make out t-distribution variables > norm500 <- rnorm(500) > tdist <- norm500/sqrt(chisqsample/5) # compare the pdfs > hist(tdist,probability=T) > lines(-50:60/10,dt(-50:60/10,5)) #compare the cdfs > cdf.compare(tdist,dist="t",df=5) # lets generate some F_3,2 variables # first lets make the two chisq variables > norm3 <- matrix(rnorm(3*500),3,500) > chisq3 <- apply(norm3,2,sumsqcol) > norm2 <- matrix(rnorm(2*500),2,500) > chisq2 <- apply(norm2,2,sumsqcol) > Fvalues <- (chisq3/3)/(chisq2/2) # compare the pdfs > hist(Fvalues,probability=T) > hist(rf(500,3,2),probability=T) #compare the cdf graphically > cdf.compare(Fvalues,dist="f",df1=3,df2=3)