# Classifier Comparison

The code below is to do a comparison for six (6) classifiers using Pima Indian Database. The accuracy for each classifier is determined based on kfold cross-validation (https://machinelearningmastery.com/k-fold-cross-validation/). For each cross-validation, it will be repeated for n-Times.

```# Run Algorithms for n-Times and Determine the Average Value based on
# Kfold Method
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import shuffle

url = "https://bit.ly/2GX9wC5"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

# For plotting
fig, ax = plt.subplots()

# prepare models
model1 = LogisticRegression(solver = 'lbfgs',max_iter=1000) #LR
model2 = LinearDiscriminantAnalysis() #LDA
model3 = KNeighborsClassifier(n_neighbors = 3) #KNN
model4 = GaussianNB() #GNB
model5 = DecisionTreeClassifier(max_depth=5) #DT

# evaluate model
resultslr = []
resultslda = []
resultsknn = []
resultsgnb = []
resultsdt = []
resultsqda = []

scoring = 'accuracy'
times = 10             # How many times to repeat

for x in range (times):
# shuffle the data for each times
dataframe = shuffle(dataframe)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
kfold = KFold(n_splits=10)

# Logic Regression
cv_results = cross_val_score(model1, X, Y, cv=kfold, scoring=scoring)
resultslr.append(cv_results.mean()*100)
# Linear Discriminant Analysis
cv_results = cross_val_score(model2, X, Y, cv=kfold, scoring=scoring)
resultslda.append(cv_results.mean()*100)

# K-Nearest Neighbor
cv_results = cross_val_score(model3, X, Y, cv=kfold, scoring=scoring)
resultsknn.append(cv_results.mean()*100)

# Gaussian Naive Bayes
cv_results = cross_val_score(model4, X, Y, cv=kfold, scoring=scoring)
resultsgnb.append(cv_results.mean()*100)

# Decision Tree
cv_results = cross_val_score(model5, X, Y, cv=kfold, scoring=scoring)
resultsdt.append(cv_results.mean()*100)

cv_results = cross_val_score(model6, X, Y, cv=kfold, scoring=scoring)
resultsqda.append(cv_results.mean()*100)

# Plot the result
t = np.arange(1, times+1, 1)        # to plot from 1 to n-Times
ax.plot(t,resultslr,t,resultslda,t,resultsknn,t,resultsgnb,t,resultsdt,t,resultsqda)

ax.set(xlabel='times', ylabel='Accuracy (%)',
title='Pima Indian Database')
ax.grid()
ax.set_xlim(1, times)
ax.set_ylim(60, 80)
ax.legend(['Logic Regression','Linear Discriminant Analysis','K-Nearest Neighbor','Gaussian Naive Bayes','Decision Tree','Quadratic Discriminant Analysis'])
#plt.xlabel('times')
#plt.ylabel('Accuracy(%)')
#plt.title('Pima Indian Databases')
#plt.grid(True)
plt.show()```