#------------------------------------------------------------------------------- # Environment Declarations #------------------------------------------------------------------------------- rm(list=ls()) options(object.size=Inf) memory.size(max = TRUE) #------------------------------------------------------------------------------- # Function Declarations #------------------------------------------------------------------------------- source("http://bioconductor.org/biocLite.R") biocLite() biocLite("multtest") library(multtest) install.packages("polycor") library(polycor) install.packages("gplots") library(gplots) image.scale <- function (z, col, x, y = NULL, size = NULL, digits = 2, labels = c("breaks", "ranges")) { # sort out the location n <- length(col) usr <- par("usr") mx <- mean(usr[1:2]); my <- mean(usr[3:4]) dx <- diff(usr[1:2]); dy <- diff(usr[3:4]) if (missing(x)) x <- mx + 1.05*dx/2 # default x to right of image else if (is.list(x)) { if (length(x$x) == 2) size <- c(diff(x$x), -diff(x$y)/n) y <- x$y[1] x <- x$x[1] } else x <- x[1] if (is.null(size)) if (is.null(y)) { size <- 0.618*dy/n # default size, golden ratio y <- my + 0.618*dy/2 # default y to give centred scale } else size <- (y-my)*2/n if (length(size)==1) size <- rep(size, 2) # default square boxes if (is.null(y)) y <- my + n*size[2]/2 # draw the image scale i <- seq(along = col) rect(x, y - i * size[2], x + size[1], y - (i - 1) * size[2], col = rev(col), xpd = TRUE) # sort out the labels rng <- range(z, na.rm = TRUE) bks <- seq(from = rng[2], to = rng[1], length = n + 1) bks <- formatC(bks, format="f", digits=digits) labels <- match.arg(labels) if (labels == "breaks") ypts <- y - c(0, i) * size[2] else { bks <- paste(bks[-1], bks[-(n+1)], sep = " - ") ypts <- y - (i - 0.5) * size[2] } text(x = x + 1.2 * size[1], y = ypts, labels = bks, adj = ifelse(size[1]>0, 0, 1), xpd = TRUE) } #------------------------------------------------------------------------------- # Import Data #------------------------------------------------------------------------------- working.data <- read.table("RawData.txt",row.names=1,header=TRUE) working.data <- working.data[,c(5,6,2,3,4,1)] working.data[1:5,] col.2.use <- c("green","green","green","pink","pink","pink") #------------------------------------------------------------------------------- # Generate Box Plot #------------------------------------------------------------------------------- WorkingList <- vector("list", dim(working.data)[2]) for ( i in 1:dim(working.data)[2]) { WorkingList[[i]] <- as.numeric(working.data[,i]) } boxplot(WorkingList,col=col.2.use) #------------------------------------------------------------------------------- # Generate PCA #------------------------------------------------------------------------------- pca.results <- princomp(working.data,cor=F) pca.loadings <- loadings(pca.results) totalvar <- (pca.results$sdev^2) variancePer <- round(totalvar/sum(totalvar)*100,1) variancePer <- variancePer[1:3] x.range <- range(pca.loadings[,1]) y.range <- range(pca.loadings[,2]) xlab <- paste(c("Principal Component 1 (",variancePer[1],"%)"),collapse="") ylab <- paste(c("Principal Component 2 (",variancePer[2],"%)"),collapse="") plot(pca.loadings[,1],pca.loadings[,2],type='n',xlab=xlab,ylab=ylab) points(pca.loadings[,1],pca.loadings[,2],lwd=2,col=col.2.use,cex=3,pch=19) points(pca.loadings[,1],pca.loadings[,2],lwd=2,col=1,cex=3,pch=21) #------------------------------------------------------------------------------- # Generate Heat Map #------------------------------------------------------------------------------- cdat <- working.data cdat <- cor(cdat) par("mar" = c(5, 4, 4, 10) + 0.1) # wide righthand margin greys <- rgb(r=(3:0)/3, g=(3:0)/3, b=(3:0)/3, names=paste("greys",3:0,sep=".")) image(cdat,col=greys) image.scale(as.numeric(cdat),col=greys,digits = 3) box() #------------------------------------------------------------------------------- # Noise Analysis #------------------------------------------------------------------------------- group1 <- working.data[,c(1:3)] group2 <- working.data[,c(4:6)] g1.mean <- apply(group1,1,mean) g2.mean <- apply(group2,1,mean) g1.sd <- apply(group1,1,sd) g2.sd <- apply(group2,1,sd) g1.cv <- (g1.sd/g1.mean)*100 g2.cv <- (g2.sd/g2.mean)*100 lowess.g1 <- lowess(g1.cv~g1.mean) lowess.g2 <- lowess(g2.cv~g2.mean) xrange <- range(c(2:14)) yrange <- range(c(0:20)) plot(xrange,yrange,type='n',xlab="Mean Expression",ylab="C.V.") points(g2.mean,g2.cv,type='p',col=8) lines(lowess.g1,col=2,lwd=2) lines(lowess.g2,col=7,lwd=2) abline(v=6) #------------------------------------------------------------------------------- # Remove Noise #------------------------------------------------------------------------------- noise.pick <- 6 temp <- as.numeric(unlist(working.data)) temp <- ifelse(temp=1.25,1,0) coded.pass <- coded.fc+coded.pv coded.keepers <- names(coded.pass[coded.pass==2]) grab.keeper.info <- running.cx[coded.keepers,] nochange <- dimnames(Final.working.data[setdiff(dimnames(Final.working.data)[[1]],coded.keepers),])[[1]] up <- dimnames(grab.keeper.info[(grab.keeper.info[,3]>0),])[[1]] down <- dimnames(grab.keeper.info[(grab.keeper.info[,3]<0),])[[1]] xrange <- range(c(-20:10)) yrange <- range(c(0,20)) plot(xrange,yrange,xlab="Fold Change",ylab="-log(uncorrected P-value)",type='n',main="Gene Selection") points(ifelse(as.numeric(running.cx[,3][nochange])<(-20),-20,as.numeric(running.cx[,3][nochange])),-log(as.numeric(running.cx[,5][nochange])),col=8,cex=1.5,pch=1) points(ifelse(as.numeric(running.cx[,3][up])>20,20,as.numeric(running.cx[,3][up])),-log(as.numeric(running.cx[,5][up])),col=1,cex=1.5,pch=2) points(ifelse(as.numeric(running.cx[,3][down])<(-20),-20,as.numeric(running.cx[,3][down])),-log(as.numeric(running.cx[,5][down])),col=1,cex=1.5,pch=6) abline(v=-1) abline(v=1) abline(h=-log(0.05),lty=2) abline(v=-1.25,lty=2) abline(v=1.25,lty=2) temp <- legend("topright", legend = c(" ", " ", " "),text.width=strwidth("1,000,000,000"),pch=c(2,1,6),xjust=1,yjust=1,title="Gene Difference",col=c(1,8,1)) a <- paste(c("Up (n=",length(up),")"),collapse="") b <- paste(c("None (n=",length(nochange),")"),collapse="") c <- paste(c("Down (n=",length(down),")"),collapse="") text(temp$rect$left + temp$rect$w, temp$text$y,c(a,b,c), pos = 2) #------------------------------------------------------------------------------- # Repeat Visualizations Post FDR MCC #------------------------------------------------------------------------------- par(mfrow=c(1,2)) Final.working.data <- Final.working.data[coded.keepers,] pca.results <- princomp(Final.working.data,cor=F) pca.loadings <- loadings(pca.results) totalvar <- (pca.results$sdev^2) variancePer <- round(totalvar/sum(totalvar)*100,1) variancePer <- variancePer[1:3] x.range <- range(pca.loadings[,1]) y.range <- range(pca.loadings[,2]) xlab <- paste(c("Principal Component 1 (",variancePer[1],"%)"),collapse="") ylab <- paste(c("Principal Component 2 (",variancePer[2],"%)"),collapse="") plot(pca.loadings[,1],pca.loadings[,2],type='n',xlab=xlab,ylab=ylab) points.2.use <- as.numeric(unlist(sample.info$Shape)) points(pca.loadings[,1],pca.loadings[,2],lwd=2,col=col.2.use,cex=2,pch=19) points(pca.loadings[,1],pca.loadings[,2],lwd=2,col=1,cex=2,pch=21) x.range <- range(pca.loadings[,2]) y.range <- range(pca.loadings[,3]) xlab <- paste(c("Principal Component 2 (",variancePer[2],"%)"),collapse="") ylab <- paste(c("Principal Component 3 (",variancePer[3],"%)"),collapse="") plot(pca.loadings[,2],pca.loadings[,3],type='n',xlab=xlab,ylab=ylab) points.2.use <- as.numeric(unlist(sample.info$Shape)) points(pca.loadings[,2],pca.loadings[,3],lwd=2,col=col.2.use,cex=2,pch=19) points(pca.loadings[,2],pca.loadings[,3],lwd=2,col=1,cex=2,pch=21) cdat <- Final.working.data[coded.keepers,] cdat <- cor(cdat) par("mar" = c(5, 4, 4, 10) + 0.1) # wide righthand margin greys <- rgb(r=(3:0)/3, g=(3:0)/3, b=(3:0)/3, names=paste("greys",3:0,sep=".")) image(cdat,col=greys) image.scale(as.numeric(cdat),col=greys,digits = 3) box() #------------------------------------------------------------------------------- # Step 22: Generate clustered Heat Map #------------------------------------------------------------------------------- ccc <- Final.working.data[coded.keepers,] cc <- colorpanel(11,low="blue",mid="white",high="red") dimnames(ccc)[[2]] <- as.character(c(1:dim(ccc)[2])) heatmap.2(as.matrix(ccc),col=cc,trace="none",density.info="none",ColSideColors=c("green","green","green","pink","pink","pink")) #------------------------------------------------------------------------------- # Step 23: Output Results #------------------------------------------------------------------------------- write.table(running.cx[coded.keepers,],"Differential_Expressed_Gene_Fragments.txt",sep="\t") write.table(running.cx[nochange,],"Constituitive_Expressed_Gene_Fragments.txt",sep="\t")