# The parameters below should be set and comment
# symbols removed.  csize is size is cancer group in the
# training set, nsize is size of control group in the training
# set.  ntrials is number of resamplings

#rm(list=ls())
#csize = 81; nsize=46;
#datadir = "c:/data/ciphergen/";
#ntrials = 50;
#lookuppt = 0

setwd(datadir)

# requires the pamr package from R or Bioconductor site
library(pamr)


outcomepreds = matrix(rep(0,4*ntrials),ncol=4)

# if the process died midway through the resamplings this looks
# to start where it left off.
try(load(paste(datadir,"pamres-",as.character(lookuppt),"-",as.character(csize),"-",as.character(nsize),".rda",sep="")))

lookup <-  function(point,data=wvec){
  lookup <-  which(abs(data - point) == min(abs(data - point)))
  lookup[1]
}


load(paste(datadir,"cancer.rda",sep=""))
load(paste(datadir,"control.rda",sep=""))
load(paste(datadir,"wvec.rda",sep=""))


ttestp = function(y,labels,label1,label2){
  ttestp = t.test(y[labels == label1],
                     y[labels == label2])$p.value
  ttestp
}
avnrmlz = function(vec,avval){
  avnrmlz = vec*avval/mean(vec)
}


leftpoint = lookup(lookuppt)
if(wvec[leftpoint] < lookuppt) leftpoint = leftpoint + 1

startpoint = min(which(outcomepreds[,1] == 0))

for(z in startpoint:ntrials){

set.seed(z)

dimcan = dim(cancer)
dimcon = dim(control)
listcancer1 =  sample(dimcan[1],csize)
listcontrol1 = sample(dimcon[1],nsize)


cancer1 = cancer[listcancer1,leftpoint:dimcan[2]];
control1 = control[listcontrol1,leftpoint:dimcon[2]]

lc1 = length(listcancer1); #lc2 = length(listcancer2);
ln1 = length(listcontrol1);# ln2 = length(listcontrol2);
N1 = lc1+ln1

rorder = sample(1:N1)
backorder = (1:N1)[order(rorder)]
cancer2 = cancer[-listcancer1,leftpoint:dimcan[2]];
control2 = control[-listcontrol1,leftpoint:dimcon[2]]
lc2 = dim(cancer2)[1]; ln2 = dim(control2)[1] 

avval = mean(rbind(cancer1,control1))

bset1 = t(apply(rbind(cancer1,control1),1,avnrmlz,avval))
bset1 = bset1[rorder,]
backorder = (1:N1)[order(rorder)]
classlabels = c(rep("C",lc1),rep("N",ln1))[rorder]
class01s = as.integer(ifelse(classlabels == "C",0,1))

bset2 = t(apply(rbind(cancer2,control2),1,avnrmlz,mean(rbind(cancer2,control2))))
classlabels2 = c(rep("C",lc2),rep("N",ln2))
dimbset1 = dim(bset1)


ttestps = apply(bset1,2,ttestp,labels=classlabels,label1="C",label2="N")

pboundary = .05/dim(bset1)[2]
 smallps = (which(ttestps < pboundary))

datax = bset1[,smallps];
groupclass = ifelse(classlabels == "C",0,1);


data1 = list(x=t(datax),y=classlabels)
train1 = pamr.train(data1)
results1 = pamr.cv(train1,data1)
thresh1 = min(results1$threshold[which(results1$error ==
  min(results1$error))])
  
# pamr.plotcv(results1)
# pamr.confusion(results1,threshold=thresh1)

test = t(bset2[,smallps])
groupclass2 = ifelse(classlabels2 == "C",0,1)

#pamr.predict(train1,test,threshold=thresh1,
#type="class")

tableboost = table(groupclass2,pamr.predict(train1,test,threshold=thresh1,
type="class"))


outcomepreds[z,] = c(
             (tableboost[1,1])/(tableboost[1,1]+tableboost[1,2]),
(tableboost[1,1]+tableboost[1,2]),
             (tableboost[2,2])/(tableboost[2,2]+tableboost[2,1]),
(tableboost[2,2]+tableboost[2,1]))

print(date())
print(paste("cancer1 =",cancer1[1,1]))
print(paste("z =",z))
print(outcomepreds[z,])
save(list=c("outcomepreds"),file=paste(datadir,"pamres-",as.character(lookuppt),"-",as.character(csize),"-",as.character(nsize),".rda",sep=""))
}