package rs.ac.bg.fon.ai.dataPreparation;

import java.text.DecimalFormat;
import java.util.Random;

import weka.classifiers.Evaluation;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.trees.J48;
import weka.core.Instances;
import weka.core.converters.ConverterUtils.DataSource;

public class PercentageSplit {

	private static String fileName = "data/diabetes.arff";

	public static void main(String[] args) throws Exception {
		
		// reading dataset from a file that contains the full dataset 
		// we want to split it to create train and test sets
		DataSource loader = new DataSource(fileName);
		Instances dataset = loader.getDataSet();		
		dataset.setClassIndex(dataset.numAttributes() - 1);
		
		
		// randomizing the full dataset using random generator 
		// set the seed number so that we can replicate the results
		int seed = 2;		
		Random rand = new Random(seed);   			// create seeded number generator
		Instances randData = new Instances(dataset);   // create a copy of the original data
		randData.randomize(rand);         			// randomize data with the random number generator
		
		int trainSize = (int) Math.round(randData.numInstances() * 0.8);		// train set will consist of 80% of the full dataset
		int testSize = randData.numInstances() - trainSize;					// test set will consist of 20% of the full dataset
		Instances train = new Instances(randData, 0, trainSize);
		Instances test = new Instances(randData, trainSize, testSize);
		
		
		// create classifiers
		J48 dtClsf = new J48();
		dtClsf.buildClassifier(train);
		
		NaiveBayes nbClsf = new NaiveBayes();
		nbClsf.setUseSupervisedDiscretization(true);
		nbClsf.buildClassifier(train);
		
		// evaluate the classifier with the test data
		Evaluation dtEval = new Evaluation(test);
		dtEval.evaluateModel(dtClsf, test);
		
		Evaluation nbEval = new Evaluation(test);
		nbEval.evaluateModel(nbClsf, test);
		
		
		// comparing the accuracy of the two models
		DecimalFormat formatter = new DecimalFormat();
		System.out.println("Accuracy:");
		System.out.println("Decision tree: " + formatter.format(dtEval.pctCorrect()));
		System.out.println("Naive Bayes: " + formatter.format(nbEval.pctCorrect()));
			
	}
}
