cross_validate_classifier <- function(seed,
                                      nclust, # number of logical clusters to be created by doSNOW 
                                      train.data, 
                                      ml.method,
                                      grid.spec) { # values of the tuning parameter 
                                                   # to be tried out by the train method
  require(caret)
  require(doSNOW)
  
  # setup the CV process
  cv.cntrl <- trainControl(method = "cv", number = 5, search = "grid")
  
  # Create a cluster to work on nclust logical cores;
  # what it means (simplified): create nclust instances of RStudio and 
  # let caret use them for the processing 
  cl <- makeCluster(nclust, 
                    type = "SOCK") # SOCK stands for socket cluster
  registerDoSNOW(cl)
  
  # Track the time of the code execution
  start.time <- Sys.time()
  
  set.seed(seed)
  model.cv <- train(Label ~ ., 
                    data = train.data, 
                    method = ml.method, 
                    trControl = cv.cntrl, 
                    tuneGrid = grid.spec)
  
  # Processing is done, stop the cluster
  stopCluster(cl)
  
  # Compute and print the total time of execution
  total.time <- Sys.time() - start.time
  print(paste("Total processing time:", total.time))
  
  # Return the results
  model.cv
  
}

# Function for calculating relative (normalized) term frequency (TF)
relative.term.frequency <- function(row) { # in DTM, each row corresponds to one document 
  row / sum(row)
}

# Function for calculating inverse document frequency (IDF)
# Formula: log(corpus.size/doc.with.term.count)
inverse.doc.freq <- function(col) { # in DTM, each column corresponds to one term (feature) 
  corpus.size <- length(col) # the length of a column is in fact the number of rows in DTM
  doc.count <- length(which(col > 0)) # number of documents that contain the term
  log10(corpus.size / doc.count)
}

# Function for calculating TF-IDF
tf.idf <- function(x, idf) {
  x * idf
}


harmonicMean <- function(logLikelihoods, precision=2000L) {
  require("Rmpfr")
  llMed <- median(logLikelihoods)
  as.double(llMed - log(mean(exp(-mpfr(logLikelihoods,
                                       prec = precision) + llMed))))
}
