rm(list=ls())

# this is cubic-r-program.r
# Used to perform cubic spline alignment
# alldata.filename is the csv file that requires alignment
# small.numaber.of.peaks.filename is the csv file
# that contains the target (reference) m/z values in the first column
# and the actual (misaligned) m/z values in the second column


# the name of the file should end in .csv, otherwise there may be problems.
alldata.filename = "alldata.csv";
small.number.of.peaks.filename = "peaks-spline.csv";

# read in the two files
startdata = read.csv(file=alldata.filename, header = TRUE, sep = ",", quote="\"", dec=".", fill = TRUE)


peakdata = read.csv(file=small.number.of.peaks.filename, header = TRUE, sep = ",", quote="\"", dec=".", fill = TRUE)

# the first column is molecular weight vector
# the second column holds the associated intensities
wvec = startdata[,1]
intensities = startdata[,2]

# target mass values are mis (perhaps from reference spectrum)
# actual mass values are pis
mis = peakdata[,1]
pis = peakdata[,2]

# try to make sure only valid numeric values are input
mis = mis[is.numeric(mis) & !is.na(mis)]
pis = pis[is.numeric(pis) & !is.na(pis)]


# a function for interpolating used below
meanmass = function(y2,y1,x2,x1,val){
  p = (val-x1)/(x2-x1)
  (1-p)*y1+p*y2
}


# fitting the spline
yis = pis/mis
ss = smooth.spline(pis,yis)
cbind(pis,mis,yis,predict(ss,pis)$y)


# evaluating the f_lambda function for all m/z
tempwvec = predict(ss,wvec)

# the next block of code performs interpolation and attends to the
# potential problem that interpolating at the end might not
# be possible
newwvec = tempwvec$x/tempwvec$y
lennew = length(newwvec)
newintensities = rep(NA,lennew)


startcopies = which(wvec <= min(newwvec))
endcopies = which(wvec >= max(newwvec))

newintensities[startcopies] = intensities[1]
newintensities[endcopies] = intensities[lennew]


minpos = if(length(startcopies) > 0) {
  minpos  = max(startcopies)+1 } else {
  minpos = 1
}

maxpos = if(length(endcopies) > 0) {
  maxpos = min(endcopies)-1 } else {
  maxpos = lennew
}

j = 1


for(i in (minpos:maxpos)){
  continue = TRUE
  while(continue && (j < lennew)){
    if(wvec[i] >= newwvec[j] && wvec[i] < newwvec[j+1]) {
      newintensities[i] = meanmass(intensities[j+1],intensities[j],
                                   newwvec[j+1],newwvec[j],wvec[i])
      continue = FALSE
    } else {
      j = j + 1
    }  
  }
}


if(minpos > 1) {newintensities[startcopies] = newintensities[minpos]}else{}

if(maxpos < lennew) {newintensities[endcopies] = newintensities[maxpos]}else{}


#
#
# one can remove the comment delimeters for the following
# plot and lines commands to see how well spline fits the data
# at the given pis -- a poor fit suggests
# more intermediate points may be necessary
#
# a bad fit might be indicated if one point shows
# a much different pis/mis ratio (e.g. all points
# except one with ratios near .995 and with one point showing 1.05)
#
# plot(pis,yis,type="b")
#lines(predict(ss,seq(min(pis),max(pis),len=20)),col=3,type="l")


# this writes out a new file with name augmented by a
# suffix of "-new.csv" instead of ".csv"
# the initial part of the file name matches that of
# alldata.filename
newfilename = sub("\.csv","-new\.csv",alldata.filename)
write.table(round(cbind(wvec,newintensities),digits=5),file=newfilename,sep=",",col.names=c("M/Z","Intensity"),row.names=FALSE)

q("no")