#!/usr/bin/Rscript

# Copyright (c) 2014,
# Mathias Kuhring, KuhringM@rki.de, Robert Koch Institute, Germany, 
# All rights reserved. For details, please note the license.txt.

# surankco-feature: feature generation from contigs (ACE format) and 
#                   corresponding reads (QUAL or FASTQ format)


# get script path
args <- commandArgs(trailingOnly = FALSE)
script.arg <- "--file="
script.name <- sub(script.arg, "", args[grep(script.arg, args)])
script.path <- dirname(script.name)


# testing/debugging
# args <- c("--directory=data", "--split.regex=\\.\\d+-")
# script.path <- getwd()
DEBUGGING <- FALSE


# sources and libraries
source(paste(script.path, '/r/parameter.R', sep=""))
source(paste(script.path, '/r/feature.R', sep=""))
source(paste(script.path, '/r/functions.R', sep=""))
source(paste(script.path, '/r/coverage.R', sep=""))
source(paste(script.path, '/r/covcurv.R', sep=""))
source(paste(script.path, '/r/import.R', sep=""))
loadPackages(c("optparse","parallel"), quietly=TRUE)


# parsing parameter
cat("prepare files\n")
parameters <- parseSurankcoFeature()
files <- parameters$files

if (DEBUGGING){
  print(args)
  print(parameters)
  print(files)
}


input.file.assembly <- files[ ,parameters$assembly.format]
input.file.quality <- files[ ,parameters$read.quality.format]
output.file.features <- paste(rownames(files), ".feature.tmp", sep="")
output.file.coverage <- paste(rownames(files), ".coverage.tmp", sep="")
output.file.ccoverage <- paste(rownames(files), ".ccoverage.tmp", sep="")

# extract features in java and prepare/export data for R
# multicore?
cat("extract features (part 1): ")
for (i in 1:nrow(files)){
  cat(paste0(i, " "))
  java.call <- paste("java",
                     paste("-Xms", parameters$memory, "G", sep=""), 
                     paste("-Xmx", parameters$memory, "G", sep=""), 
                     "-Dcom.ibm.tools.attach.enable=no -jar",
                     paste(script.path, "surankco.jar", sep="/"),
                     input.file.assembly[i],    # args[0] Assembly File
                     input.file.quality[i],     # args[1] Fasta/Qual File
                     output.file.features[i],   # args[2] Tmp Feature Output File
                     output.file.coverage[i],   # args[3] Tmp Coverage Output File
                     output.file.ccoverage[i],  # args[4] Tmp CCoverage Output File
                     parameters$fastq.version,  # args[5] Fastq version
                     parameters$split.regex,    # args[6] Read name split regex
                     parameters$kmer.features,  # args[7] kmer on/off
                     parameters$contig.size.filter) # args[8] minimum contig size
  
  if(DEBUGGING){
    print(java.call)
  }
  
  if(code <- system(java.call, ignore.stdout=FALSE)){
    complainAndStop("java not successfully executed", code)
  }
}
cat("\n")

cat("extract features (part 2)\n")
# import java features and prepared data (coverages)
features.raw <- importFeatures(input.file.assembly,
                               input.file.quality,
                               output.file.features,
                               output.file.coverage,
                               output.file.ccoverage)

# extract features in R
features.raw <- attachEGS(features.raw, parameters$expected.genome.size)
features.final <- extractFeatures(features.raw, 
                                  parameters$threads)
#parameters$expected.genome.size)  

# export features as csv
cat("export features\n")
final.file.features <- paste(rownames(files), ".features.txt", sep="")
for (i in 1:nrow(files)){
  write.table(features.final[[i]], file=final.file.features[i],
              sep="\t", dec = ".", col.names=TRUE, row.names=FALSE)
}

# unlink tmp files
if (!DEBUGGING){
  cat("remove temporary files\n")
  unlink(c(output.file.features,output.file.coverage,output.file.ccoverage))
}

# done
cat("surankco-feature calculations done\n")
