# Lab 5A: Statistical Modeling (Easy)

1. Load the Iris data set.
data(iris)
1. Peek at the data.
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
1. Look at unique species.
unique(iris$Species) ## [1] setosa versicolor virginica ## Levels: setosa versicolor virginica ### Create a Gaussian Distribution Model 1. Create a plot of sepal width plot(density(iris$Sepal.Width))

1. Get the mean
irisMean <- mean(iris$Sepal.Width) 1. Print the mean print(irisMean) ## [1] 3.057333 1. Get the standard deviation irisStdDev <- sd(iris$Sepal.Width)
1. Print the standard deviation
print(irisStdDev)
## [1] 0.4358663
1. Create points along x-axis of the distribution
distributionX <- seq(
from = min(iris$Sepal.Width), to = max(iris$Sepal.Width),
length = 100)
1. Compute the y-axis height of each point
distributionY <- dnorm(
x = distributionX,
mean = irisMean,
sd = irisStdDev)
1. Add the distribution to the plot
plot(density(iris$Sepal.Width)) lines( x = distributionX, y = distributionY, col = "red") 1. Generate/predict new values from model values <- rnorm( n = 10000, mean = mean(iris$Sepal.Width),
sd = sd(iris$Sepal.Width)) 1. Add plot of distribution of generated values plot(density(iris$Sepal.Width))
plot(density(iris$Sepal.Width)) lines( x = distributionX, y = distributionY, col = "red") lines( x = density(values), col = "blue") 1. Get mean of generated values mean(values) ## [1] 3.059143 1. Get standard deviation of generated values sd(values) ## [1] 0.4397619 ### Create a Simple Linear Regression Model 1. Create a scatterplot matrix. plot(iris[1:4]) 1. Create a scatterplot of petal length vs width. plot( x = iris$Petal.Length,
y = iris$Petal.Width) 1. Create a linear regression model. model <- lm( formula = Petal.Width ~ Petal.Length, data = iris) 1. Draw linear regression model on the scatterplot. plot( x = iris$Petal.Length,
y = iris$Petal.Width) lines( x = iris$Petal.Length,
y = model$fitted, col = "red", lwd = 3) 1. Get the correlation coefficient. cor( x = iris$Petal.Length,
y = iris\$Petal.Width)
## [1] 0.9628654
1. Summarize the model.
summary(model)
##
## Call:
## lm(formula = Petal.Width ~ Petal.Length, data = iris)
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -0.56515 -0.12358 -0.01898  0.13288  0.64272
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.363076   0.039762  -9.131  4.7e-16 ***
## Petal.Length  0.415755   0.009582  43.387  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2065 on 148 degrees of freedom
## Multiple R-squared:  0.9271, Adjusted R-squared:  0.9266
## F-statistic:  1882 on 1 and 148 DF,  p-value: < 2.2e-16
1. Create new petal lengths to predict.
unknownLengths <- data.frame(
Petal.Length = c(2, 5, 7))
1. Predict new unknown values from the model.
predict(
object = model,
newdata = unknownLengths)
##         1         2         3
## 0.4684353 1.7157016 2.5472124