#M-x set-variable comint-scroll-to-bottom-on-input t
# find-matches.r
# this program helps to find peaks that should be aligned
# in a reference spectrum and a misaligned spectrum
# input: a reference.csv file with all data for the reference spectrum
# and misaligned.csv file with all data for the misaligned spectrum

rm(list=ls())

# a function used below
lookup <-  function(point,data=wvec){
  lookup <-  which(abs(data - point) == min(abs(data - point)))
  lookup[1]
}

peakinds = function(x){
  y = x
  ytrue = rep(FALSE,length(x))
  for(i in 2:(length(y)-1)){
    if(y[i-1] < y[i] && y[i] > y[i+1]) ytrue[i] = TRUE
  }
  ytrue
}



cor2peaks = function(testweight,refweight,corwidth){
  mid.test.pos = lookup(testweight,testdata[,1])
  bot.test.pos = lookup(testweight*(1-corwidth),testdata[,1])
  top.test.pos = lookup(testweight*(1+corwidth),testdata[,1])
  mid.ref.pos = lookup(refweight,refdata[,1])
  bot.ref.pos = lookup(refweight*(1-corwidth),refdata[,1])
  top.ref.pos = lookup(refweight*(1+corwidth),refdata[,1])
  mid2bot = min((mid.test.pos-bot.test.pos),(mid.ref.pos-bot.ref.pos))
  mid2top = min((top.test.pos-mid.test.pos),(top.ref.pos-mid.ref.pos))
 cor(testdata[(mid.test.pos-mid2bot):(mid.test.pos+mid2top),2],
     refdata[(mid.ref.pos-mid2bot):(mid.ref.pos+mid2top),2])
}


findbestpeak = function(t.botrange,t.toprange,corwidth,sidelength.percent,plotme=T){
  test.peak.info = peakinds(testdata[,2])

  t.botpos = lookup(t.botrange,testdata[,1])
  t.toppos = lookup(t.toprange,testdata[,1])

  t.peaks.range =
  testdata[t.botpos:t.toppos,][test.peak.info[t.botpos:t.toppos]==T,]

  bigpeakpos = which(t.peaks.range[,2] == max(t.peaks.range[,2]))[1]

  bigpeakweight = t.peaks.range[bigpeakpos,1]
  
# Given the reference spectrum and a peak in a test (i.e. misaligned)
# spectrum, find the peak in the reference spectrum that seems the
# best match, based on correlation of the two spectra.

  testweight = bigpeakweight
  sidelength = sidelength.percent*testweight


  peak.info = peakinds(refdata[,2])
  
  botweight = testweight-sidelength
  botpos = lookup(refdata[,1],botweight)
  topweight = testweight+sidelength
  toppos = lookup(refdata[,1],topweight)

  nearpeaks = refdata[botpos:toppos,][peak.info[botpos:toppos]==T,]
  
  corrs = rep(0,dim(nearpeaks)[1])

  if (length(corrs) > 0){
    for(i in 1:length(corrs)){
      corrs[i] = cor2peaks(testweight,nearpeaks[i,1],corwidth)
    }

    corrorder = order(corrs,decreasing=T)

    bestmatch = which(corrs == max(corrs))[1]

    printlength = min(3,length(corrorder))

    if(plotme) {plot.match(testweight,corwidth,nearpeaks,bestmatch,corrorder,corrs)}

    returnme = data.frame(targetweight = testweight, nearpeaks =
      nearpeaks[corrorder[1:printlength],1], Intensity =
      nearpeaks[corrorder[1:printlength],2], Correlations =
      corrs[corrorder[1:printlength]]) }

  else {
    
    returnme = data.frame(targetweight = testweight, nearpeaks = NA,
      Intensity = NA, Correlations = NA)}

  returnme
}


plot.match <- function(testweight,pct.window,nearpeaks,bestmatch,corrorder,corrs){
  r.wvec = refdata[,1]
  t.wvec = testdata[,1]


  r.position = lookup(testweight,data=r.wvec)
  r.bot = lookup(testweight*(1-pct.window),data=r.wvec)
  r.top = lookup(testweight*(1+pct.window),data=r.wvec)

  t.position = lookup(testweight,data=t.wvec)
  t.bot = lookup(testweight*(1-pct.window),data=t.wvec)
  t.top = lookup(testweight*(1+pct.window),data=t.wvec)


  position.r.max = r.bot-1+which(refdata[r.bot:r.top,2]==
    max(refdata[r.bot:r.top,2]))
  r.max = refdata[position.r.max,2]

  position.t.max = t.bot-1+which(testdata[t.bot:t.top,2]==
    max(testdata[t.bot:t.top,2]))
  t.max = testdata[position.t.max,2]


  plot(r.wvec[r.bot:r.top],refdata[r.bot:r.top,2],type="l",ylim =
  c(0,1.2*max(t.max,r.max)),xlab="M/Z weight",ylab="Intensity")
  lines(t.wvec[t.bot:t.top],testdata[t.bot:t.top,2],type="l",col=2)

  abline(v=nearpeaks[bestmatch,1],lty=2,col=1)
  abline(v=testweight,lty=2,col=2)

  legend.y = 1.2*max(t.max,r.max)
  legend.x = r.wvec[r.bot]
  legend.x2 = .45*r.wvec[r.bot]+.55*r.wvec[r.top]
  legend(legend.x,legend.y,
       legend=c(paste("Reference, peak at",round(nearpeaks[corrorder[1],1],1)),
         paste("Test, peak at",round(testweight,1))),
       lty = c(1,1),col=c(1,2))
  
  legend(legend.x2,legend.y,legend=c(paste("1st Corr =",
    round(corrs[corrorder[1]],2), ", M/Z = ",
    round(nearpeaks[corrorder[1],1],1)),
    paste("2nd Corr =",round(corrs[corrorder[2]],2), ", M/Z = ",
    round(nearpeaks[corrorder[2],1],1)),
    paste("3rd Corr =",round(corrs[corrorder[3]],2), ", M/Z = ",
    round(nearpeaks[corrorder[3],1],1))),bty="n")
}


# read in the two files
filename = "test.csv"
testdata = read.csv(file=filename, header = TRUE, sep = ",", quote="\"", dec=".", fill = TRUE)

reffilename = "reference.csv"
refdata = read.csv(file=reffilename, header = TRUE, sep = ",", quote="\"", dec=".", fill = TRUE)



corwidth = .05;
swidth = .02 #sidelength.percent = .02


peakpairs = NULL

# set graphics page to show 3 graphs at a time
par(mfrow=c(3,1))

temppeaks = findbestpeak(2000,4000,corwidth = corwidth,sidelength.percent = swidth)
temppeaks
peakpairs = rbind(peakpairs,temppeaks[1,])

temppeaks = findbestpeak(4000,7000,corwidth = corwidth,sidelength.percent = swidth)
temppeaks
peakpairs = rbind(peakpairs,temppeaks[1,])


temppeaks = findbestpeak(7000,10000,corwidth = corwidth,sidelength.percent = swidth)
temppeaks
peakpairs = rbind(peakpairs,temppeaks[1,])


temppeaks = findbestpeak(10000,13000,corwidth = corwidth,sidelength.percent = swidth)
temppeaks
peakpairs = rbind(peakpairs,temppeaks[1,])

temppeaks = findbestpeak(13000,20000,corwidth = corwidth,sidelength.percent = swidth)
temppeaks
peakpairs = rbind(peakpairs,temppeaks[1,])


par(mfrow=c(1,1))

# check the pairs you have 
peakpairs
pctdiffs = (peakpairs[,2]-peakpairs[,1])/peakpairs[,1]
# after you are happy with the pairs you have (at least 4 for the cubic
# spline and 3 for the quadratic) the information can be written
# into a file
write.table(file="peaks.csv",data.frame(Reference = peakpairs[,2],Misaligned = peakpairs[,1],Correlation=peakpairs[,4],PctDiff=pctdiffs),row.names=F,col.names=T,sep=",",quote=F)


q("no")