#M-x set-variable comint-scroll-to-bottom-on-input t # find-matches.r # this program helps to find peaks that should be aligned # in a reference spectrum and a misaligned spectrum # input: a reference.csv file with all data for the reference spectrum # and misaligned.csv file with all data for the misaligned spectrum rm(list=ls()) # a function used below lookup <- function(point,data=wvec){ lookup <- which(abs(data - point) == min(abs(data - point))) lookup[1] } peakinds = function(x){ y = x ytrue = rep(FALSE,length(x)) for(i in 2:(length(y)-1)){ if(y[i-1] < y[i] && y[i] > y[i+1]) ytrue[i] = TRUE } ytrue } cor2peaks = function(testweight,refweight,corwidth){ mid.test.pos = lookup(testweight,testdata[,1]) bot.test.pos = lookup(testweight*(1-corwidth),testdata[,1]) top.test.pos = lookup(testweight*(1+corwidth),testdata[,1]) mid.ref.pos = lookup(refweight,refdata[,1]) bot.ref.pos = lookup(refweight*(1-corwidth),refdata[,1]) top.ref.pos = lookup(refweight*(1+corwidth),refdata[,1]) mid2bot = min((mid.test.pos-bot.test.pos),(mid.ref.pos-bot.ref.pos)) mid2top = min((top.test.pos-mid.test.pos),(top.ref.pos-mid.ref.pos)) cor(testdata[(mid.test.pos-mid2bot):(mid.test.pos+mid2top),2], refdata[(mid.ref.pos-mid2bot):(mid.ref.pos+mid2top),2]) } findbestpeak = function(t.botrange,t.toprange,corwidth,sidelength.percent,plotme=T){ test.peak.info = peakinds(testdata[,2]) t.botpos = lookup(t.botrange,testdata[,1]) t.toppos = lookup(t.toprange,testdata[,1]) t.peaks.range = testdata[t.botpos:t.toppos,][test.peak.info[t.botpos:t.toppos]==T,] bigpeakpos = which(t.peaks.range[,2] == max(t.peaks.range[,2]))[1] bigpeakweight = t.peaks.range[bigpeakpos,1] # Given the reference spectrum and a peak in a test (i.e. misaligned) # spectrum, find the peak in the reference spectrum that seems the # best match, based on correlation of the two spectra. testweight = bigpeakweight sidelength = sidelength.percent*testweight peak.info = peakinds(refdata[,2]) botweight = testweight-sidelength botpos = lookup(refdata[,1],botweight) topweight = testweight+sidelength toppos = lookup(refdata[,1],topweight) nearpeaks = refdata[botpos:toppos,][peak.info[botpos:toppos]==T,] corrs = rep(0,dim(nearpeaks)[1]) if (length(corrs) > 0){ for(i in 1:length(corrs)){ corrs[i] = cor2peaks(testweight,nearpeaks[i,1],corwidth) } corrorder = order(corrs,decreasing=T) bestmatch = which(corrs == max(corrs))[1] printlength = min(3,length(corrorder)) if(plotme) {plot.match(testweight,corwidth,nearpeaks,bestmatch,corrorder,corrs)} returnme = data.frame(targetweight = testweight, nearpeaks = nearpeaks[corrorder[1:printlength],1], Intensity = nearpeaks[corrorder[1:printlength],2], Correlations = corrs[corrorder[1:printlength]]) } else { returnme = data.frame(targetweight = testweight, nearpeaks = NA, Intensity = NA, Correlations = NA)} returnme } plot.match <- function(testweight,pct.window,nearpeaks,bestmatch,corrorder,corrs){ r.wvec = refdata[,1] t.wvec = testdata[,1] r.position = lookup(testweight,data=r.wvec) r.bot = lookup(testweight*(1-pct.window),data=r.wvec) r.top = lookup(testweight*(1+pct.window),data=r.wvec) t.position = lookup(testweight,data=t.wvec) t.bot = lookup(testweight*(1-pct.window),data=t.wvec) t.top = lookup(testweight*(1+pct.window),data=t.wvec) position.r.max = r.bot-1+which(refdata[r.bot:r.top,2]== max(refdata[r.bot:r.top,2])) r.max = refdata[position.r.max,2] position.t.max = t.bot-1+which(testdata[t.bot:t.top,2]== max(testdata[t.bot:t.top,2])) t.max = testdata[position.t.max,2] plot(r.wvec[r.bot:r.top],refdata[r.bot:r.top,2],type="l",ylim = c(0,1.2*max(t.max,r.max)),xlab="M/Z weight",ylab="Intensity") lines(t.wvec[t.bot:t.top],testdata[t.bot:t.top,2],type="l",col=2) abline(v=nearpeaks[bestmatch,1],lty=2,col=1) abline(v=testweight,lty=2,col=2) legend.y = 1.2*max(t.max,r.max) legend.x = r.wvec[r.bot] legend.x2 = .45*r.wvec[r.bot]+.55*r.wvec[r.top] legend(legend.x,legend.y, legend=c(paste("Reference, peak at",round(nearpeaks[corrorder[1],1],1)), paste("Test, peak at",round(testweight,1))), lty = c(1,1),col=c(1,2)) legend(legend.x2,legend.y,legend=c(paste("1st Corr =", round(corrs[corrorder[1]],2), ", M/Z = ", round(nearpeaks[corrorder[1],1],1)), paste("2nd Corr =",round(corrs[corrorder[2]],2), ", M/Z = ", round(nearpeaks[corrorder[2],1],1)), paste("3rd Corr =",round(corrs[corrorder[3]],2), ", M/Z = ", round(nearpeaks[corrorder[3],1],1))),bty="n") } # read in the two files filename = "test.csv" testdata = read.csv(file=filename, header = TRUE, sep = ",", quote="\"", dec=".", fill = TRUE) reffilename = "reference.csv" refdata = read.csv(file=reffilename, header = TRUE, sep = ",", quote="\"", dec=".", fill = TRUE) corwidth = .05; swidth = .02 #sidelength.percent = .02 peakpairs = NULL # set graphics page to show 3 graphs at a time par(mfrow=c(3,1)) temppeaks = findbestpeak(2000,4000,corwidth = corwidth,sidelength.percent = swidth) temppeaks peakpairs = rbind(peakpairs,temppeaks[1,]) temppeaks = findbestpeak(4000,7000,corwidth = corwidth,sidelength.percent = swidth) temppeaks peakpairs = rbind(peakpairs,temppeaks[1,]) temppeaks = findbestpeak(7000,10000,corwidth = corwidth,sidelength.percent = swidth) temppeaks peakpairs = rbind(peakpairs,temppeaks[1,]) temppeaks = findbestpeak(10000,13000,corwidth = corwidth,sidelength.percent = swidth) temppeaks peakpairs = rbind(peakpairs,temppeaks[1,]) temppeaks = findbestpeak(13000,20000,corwidth = corwidth,sidelength.percent = swidth) temppeaks peakpairs = rbind(peakpairs,temppeaks[1,]) par(mfrow=c(1,1)) # check the pairs you have peakpairs pctdiffs = (peakpairs[,2]-peakpairs[,1])/peakpairs[,1] # after you are happy with the pairs you have (at least 4 for the cubic # spline and 3 for the quadratic) the information can be written # into a file write.table(file="peaks.csv",data.frame(Reference = peakpairs[,2],Misaligned = peakpairs[,1],Correlation=peakpairs[,4],PctDiff=pctdiffs),row.names=F,col.names=T,sep=",",quote=F) q("no")