rm(list=ls()) # this is cubic-r-program.r # Used to perform cubic spline alignment # alldata.filename is the csv file that requires alignment # small.numaber.of.peaks.filename is the csv file # that contains the target (reference) m/z values in the first column # and the actual (misaligned) m/z values in the second column # the name of the file should end in .csv, otherwise there may be problems. alldata.filename = "alldata.csv"; small.number.of.peaks.filename = "peaks-spline.csv"; # read in the two files startdata = read.csv(file=alldata.filename, header = TRUE, sep = ",", quote="\"", dec=".", fill = TRUE) peakdata = read.csv(file=small.number.of.peaks.filename, header = TRUE, sep = ",", quote="\"", dec=".", fill = TRUE) # the first column is molecular weight vector # the second column holds the associated intensities wvec = startdata[,1] intensities = startdata[,2] # target mass values are mis (perhaps from reference spectrum) # actual mass values are pis mis = peakdata[,1] pis = peakdata[,2] # try to make sure only valid numeric values are input mis = mis[is.numeric(mis) & !is.na(mis)] pis = pis[is.numeric(pis) & !is.na(pis)] # a function for interpolating used below meanmass = function(y2,y1,x2,x1,val){ p = (val-x1)/(x2-x1) (1-p)*y1+p*y2 } # fitting the spline yis = pis/mis ss = smooth.spline(pis,yis) cbind(pis,mis,yis,predict(ss,pis)$y) # evaluating the f_lambda function for all m/z tempwvec = predict(ss,wvec) # the next block of code performs interpolation and attends to the # potential problem that interpolating at the end might not # be possible newwvec = tempwvec$x/tempwvec$y lennew = length(newwvec) newintensities = rep(NA,lennew) startcopies = which(wvec <= min(newwvec)) endcopies = which(wvec >= max(newwvec)) newintensities[startcopies] = intensities[1] newintensities[endcopies] = intensities[lennew] minpos = if(length(startcopies) > 0) { minpos = max(startcopies)+1 } else { minpos = 1 } maxpos = if(length(endcopies) > 0) { maxpos = min(endcopies)-1 } else { maxpos = lennew } j = 1 for(i in (minpos:maxpos)){ continue = TRUE while(continue && (j < lennew)){ if(wvec[i] >= newwvec[j] && wvec[i] < newwvec[j+1]) { newintensities[i] = meanmass(intensities[j+1],intensities[j], newwvec[j+1],newwvec[j],wvec[i]) continue = FALSE } else { j = j + 1 } } } if(minpos > 1) {newintensities[startcopies] = newintensities[minpos]}else{} if(maxpos < lennew) {newintensities[endcopies] = newintensities[maxpos]}else{} # # # one can remove the comment delimeters for the following # plot and lines commands to see how well spline fits the data # at the given pis -- a poor fit suggests # more intermediate points may be necessary # # a bad fit might be indicated if one point shows # a much different pis/mis ratio (e.g. all points # except one with ratios near .995 and with one point showing 1.05) # # plot(pis,yis,type="b") #lines(predict(ss,seq(min(pis),max(pis),len=20)),col=3,type="l") # this writes out a new file with name augmented by a # suffix of "-new.csv" instead of ".csv" # the initial part of the file name matches that of # alldata.filename newfilename = sub("\.csv","-new\.csv",alldata.filename) write.table(round(cbind(wvec,newintensities),digits=5),file=newfilename,sep=",",col.names=c("M/Z","Intensity"),row.names=FALSE) q("no")