# Lab 7A: Machine Learning (Easy)

### Predict with K-Means Cluster Analysis

1. Load the Iris data set.
data(iris)
1. Create a scatterplot matrix colored by species.
library(RColorBrewer)

palette <- brewer.pal(3, "Set2")

plot(
x = iris[1:4],
col = palette[as.numeric(iris$Species)], pch = 19) 1. View scatterplot of petal length vs width. plot( x = iris$Petal.Length,
y = iris$Petal.Width) 1. Color scatterplot by species. plot( x = iris$Petal.Length,
y = iris$Petal.Width, col = palette[as.numeric(iris$Species)],
pch = 19)

1. Create K-means clusters.
clusters <- kmeans(
x = iris[, 1:4],
centers = 3,
nstart = 10)
1. Plot each cluster as a shape and plot centroid of clusters.
plot(
x = iris$Petal.Length, y = iris$Petal.Width,
col = palette[as.numeric(iris$Species)], pch = clusters$cluster)

points(
x = clusters$centers[, "Petal.Length"], y = clusters$centers[, "Petal.Width"],
pch = 4,
lwd = 4,
col = "blue")

1. View a matrix of the actual vs.Â predicted clusters.
table(
x = clusters$cluster, y = iris$Species)
##    y
## x   setosa versicolor virginica
##   1      0          2        36
##   2      0         48        14
##   3     50          0         0

### Split Data into Test and Training Set

1. Set the seed to make randomness reproducable
set.seed(42)
1. Randomly sample 100 of 150 row indexes
indexes <- sample(
x = 1:150,
size = 100)
1. Create a training set from indexes
train <- iris[indexes, ]
1. Create a test set from remaining indexes
test <- iris[-indexes, ]

### Predict using Decision Tree

1. Load the decision tree package
library(tree)
1. Train tree model
treeModel <- tree(
formula = Species ~ .,
data = train)
1. Inspect the model
summary(treeModel)
##
## Classification tree:
## tree(formula = Species ~ ., data = train)
## Variables actually used in tree construction:
## [1] "Petal.Length" "Petal.Width"
## Number of terminal nodes:  4
## Residual mean deviance:  0.05213 = 5.004 / 96
## Misclassification error rate: 0.01 = 1 / 100
1. Plot the model
plot(treeModel)
text(treeModel)

1. Plot the decision boundaries
plot(
x = iris$Petal.Length, y = iris$Petal.Width,
pch = 19,
col = palette[as.numeric(iris$Species)], main = "Iris Petal Length vs. Width", xlab = "Petal Length (cm)", ylab = "Petal Width (cm)") partition.tree( tree = treeModel, label = "Species", add = TRUE) 1. Predict with the model treePredictions <- predict( object = treeModel, newdata = test, type = "class") 1. Inspect the prediction accuracy table( x = treePredictions, y = test$Species)
##             y
## x            setosa versicolor virginica
##   setosa         17          0         0
##   versicolor      0         16         0
##   virginica       0          2        15
library(caret)
1. Evaluate the prediction results
confusionMatrix(
data = treePredictions,
reference = test$Species) ## Confusion Matrix and Statistics ## ## Reference ## Prediction setosa versicolor virginica ## setosa 17 0 0 ## versicolor 0 16 0 ## virginica 0 2 15 ## ## Overall Statistics ## ## Accuracy : 0.96 ## 95% CI : (0.8629, 0.9951) ## No Information Rate : 0.36 ## P-Value [Acc > NIR] : < 2.2e-16 ## ## Kappa : 0.94 ## Mcnemar's Test P-Value : NA ## ## Statistics by Class: ## ## Class: setosa Class: versicolor Class: virginica ## Sensitivity 1.00 0.8889 1.0000 ## Specificity 1.00 1.0000 0.9429 ## Pos Pred Value 1.00 1.0000 0.8824 ## Neg Pred Value 1.00 0.9412 1.0000 ## Prevalence 0.34 0.3600 0.3000 ## Detection Rate 0.34 0.3200 0.3000 ## Detection Prevalence 0.34 0.3200 0.3400 ## Balanced Accuracy 1.00 0.9444 0.9714 ### Predict using Naive Bayes Classifier 1. Load the e1071 package library(e1071) 1. Train the model bayesModel <- naiveBayes( formula = Species ~ ., data = train) 1. Inspect the model summary(bayesModel) ## Length Class Mode ## apriori 3 table numeric ## tables 4 -none- list ## levels 3 -none- character ## call 4 -none- call 1. Predict with the model bayesPredictions <- predict( object = bayesModel, newdata = test[, 1:4]) 1. Evaluate the prediction results confusionMatrix( data = bayesPredictions, reference = test$Species)
## Confusion Matrix and Statistics
##
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         17          0         0
##   versicolor      0         16         1
##   virginica       0          2        14
##
## Overall Statistics
##
##                Accuracy : 0.94
##                  95% CI : (0.8345, 0.9875)
##     No Information Rate : 0.36
##     P-Value [Acc > NIR] : < 2.2e-16
##
##                   Kappa : 0.9099
##  Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                   1.00            0.8889           0.9333
## Specificity                   1.00            0.9688           0.9429
## Pos Pred Value                1.00            0.9412           0.8750
## Neg Pred Value                1.00            0.9394           0.9706
## Prevalence                    0.34            0.3600           0.3000
## Detection Rate                0.34            0.3200           0.2800
## Detection Prevalence          0.34            0.3400           0.3200
## Balanced Accuracy             1.00            0.9288           0.9381

### Predict with Neural Network

1. Load the Neural Network package
library(nnet)
1. Train the model
neuralModel <- nnet(
formula = Species ~ .,
data = train,
size = 4,
decay = 0.0001,
maxit = 500)
1. Inspect the model
summary(neuralModel)
## a 4-4-3 network with 35 weights
## options were - softmax modelling  decay=1e-04
##  b->h1 i1->h1 i2->h1 i3->h1 i4->h1
##  -1.88  -6.20  12.82   1.37   0.09
##  b->h2 i1->h2 i2->h2 i3->h2 i4->h2
##   6.61   5.95   8.32 -10.54  -9.20
##  b->h3 i1->h3 i2->h3 i3->h3 i4->h3
##   0.42   0.92   1.12  -2.72  -1.33
##  b->h4 i1->h4 i2->h4 i3->h4 i4->h4
##  -0.41  -0.81  -1.10   2.40   1.15
##  b->o1 h1->o1 h2->o1 h3->o1 h4->o1
##   0.43   0.49   2.17   6.38  -6.22
##  b->o2 h1->o2 h2->o2 h3->o2 h4->o2
##  -3.42  -9.28  17.97  -6.43   3.07
##  b->o3 h1->o3 h2->o3 h3->o3 h4->o3
##   2.98   8.78 -20.14   0.05   3.15
library(NeuralNetTools)
1. Visualize the neural network
plotnet(neuralModel)

1. Predict with the model
neuralPredictions <- predict(
object = neuralModel,
newdata = test[, 1:4],
type = "class")
1. Evaluate the prediction results
confusionMatrix(
data = neuralPredictions,
reference = test\$Species)
## Confusion Matrix and Statistics
##
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         17          0         0
##   versicolor      0         17         0
##   virginica       0          1        15
##
## Overall Statistics
##
##                Accuracy : 0.98
##                  95% CI : (0.8935, 0.9995)
##     No Information Rate : 0.36
##     P-Value [Acc > NIR] : < 2.2e-16
##
##                   Kappa : 0.97
##  Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                   1.00            0.9444           1.0000
## Specificity                   1.00            1.0000           0.9714
## Pos Pred Value                1.00            1.0000           0.9375
## Neg Pred Value                1.00            0.9697           1.0000
## Prevalence                    0.34            0.3600           0.3000
## Detection Rate                0.34            0.3400           0.3000
## Detection Prevalence          0.34            0.3400           0.3200
## Balanced Accuracy             1.00            0.9722           0.9857
1. Set working directory
setwd("C:/Workshop/Data")
1. Save the tree model
save(treeModel, file = "Tree.RData")