# The parameters below should be set and comment # symbols removed. csize is size is cancer group in the # training set, nsize is size of control group in the training # set. ntrials is number of resamplings #rm(list=ls()) #csize = 81; nsize=46; #datadir = "c:/data/ciphergen/"; #ntrials = 50; #lookuppt = 0 setwd(datadir) # requires the pamr package from R or Bioconductor site library(pamr) outcomepreds = matrix(rep(0,4*ntrials),ncol=4) # if the process died midway through the resamplings this looks # to start where it left off. try(load(paste(datadir,"pamres-",as.character(lookuppt),"-",as.character(csize),"-",as.character(nsize),".rda",sep=""))) lookup <- function(point,data=wvec){ lookup <- which(abs(data - point) == min(abs(data - point))) lookup[1] } load(paste(datadir,"cancer.rda",sep="")) load(paste(datadir,"control.rda",sep="")) load(paste(datadir,"wvec.rda",sep="")) ttestp = function(y,labels,label1,label2){ ttestp = t.test(y[labels == label1], y[labels == label2])$p.value ttestp } avnrmlz = function(vec,avval){ avnrmlz = vec*avval/mean(vec) } leftpoint = lookup(lookuppt) if(wvec[leftpoint] < lookuppt) leftpoint = leftpoint + 1 startpoint = min(which(outcomepreds[,1] == 0)) for(z in startpoint:ntrials){ set.seed(z) dimcan = dim(cancer) dimcon = dim(control) listcancer1 = sample(dimcan[1],csize) listcontrol1 = sample(dimcon[1],nsize) cancer1 = cancer[listcancer1,leftpoint:dimcan[2]]; control1 = control[listcontrol1,leftpoint:dimcon[2]] lc1 = length(listcancer1); #lc2 = length(listcancer2); ln1 = length(listcontrol1);# ln2 = length(listcontrol2); N1 = lc1+ln1 rorder = sample(1:N1) backorder = (1:N1)[order(rorder)] cancer2 = cancer[-listcancer1,leftpoint:dimcan[2]]; control2 = control[-listcontrol1,leftpoint:dimcon[2]] lc2 = dim(cancer2)[1]; ln2 = dim(control2)[1] avval = mean(rbind(cancer1,control1)) bset1 = t(apply(rbind(cancer1,control1),1,avnrmlz,avval)) bset1 = bset1[rorder,] backorder = (1:N1)[order(rorder)] classlabels = c(rep("C",lc1),rep("N",ln1))[rorder] class01s = as.integer(ifelse(classlabels == "C",0,1)) bset2 = t(apply(rbind(cancer2,control2),1,avnrmlz,mean(rbind(cancer2,control2)))) classlabels2 = c(rep("C",lc2),rep("N",ln2)) dimbset1 = dim(bset1) ttestps = apply(bset1,2,ttestp,labels=classlabels,label1="C",label2="N") pboundary = .05/dim(bset1)[2] smallps = (which(ttestps < pboundary)) datax = bset1[,smallps]; groupclass = ifelse(classlabels == "C",0,1); data1 = list(x=t(datax),y=classlabels) train1 = pamr.train(data1) results1 = pamr.cv(train1,data1) thresh1 = min(results1$threshold[which(results1$error == min(results1$error))]) # pamr.plotcv(results1) # pamr.confusion(results1,threshold=thresh1) test = t(bset2[,smallps]) groupclass2 = ifelse(classlabels2 == "C",0,1) #pamr.predict(train1,test,threshold=thresh1, #type="class") tableboost = table(groupclass2,pamr.predict(train1,test,threshold=thresh1, type="class")) outcomepreds[z,] = c( (tableboost[1,1])/(tableboost[1,1]+tableboost[1,2]), (tableboost[1,1]+tableboost[1,2]), (tableboost[2,2])/(tableboost[2,2]+tableboost[2,1]), (tableboost[2,2]+tableboost[2,1])) print(date()) print(paste("cancer1 =",cancer1[1,1])) print(paste("z =",z)) print(outcomepreds[z,]) save(list=c("outcomepreds"),file=paste(datadir,"pamres-",as.character(lookuppt),"-",as.character(csize),"-",as.character(nsize),".rda",sep="")) }