# install and load pacman for package managementif (!require("pacman", character.only =TRUE)) install.packages("pacman")
Loading required package: pacman
Warning: package 'pacman' was built under R version 4.2.3
library(pacman)# load libraries using pacmanp_load("easypackages","MASS","ISLR","arm","rtweet","tidyverse","RColorBrewer","tidytext","data.table","tidyr","coefplot2")
Installing package into 'C:/Users/Rebecca Larsen/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)
Warning: package 'coefplot2' is not available for this version of R
A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.2:
cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.2/PACKAGES'
Warning: 'BiocManager' not available. Could not check Bioconductor.
Please use `install.packages('BiocManager')` and then retry.
Warning in p_install(package, character.only = TRUE, ...):
Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
logical.return = TRUE, : there is no package called 'coefplot2'
Warning in p_load("easypackages", "MASS", "ISLR", "arm", "rtweet", "tidyverse", : Failed to install/load:
coefplot2
Load Data
## Load datasets from MASS and ISLR packagesattach(Boston)
# Set the next plot configurationpar(mfrow=c(2,2), main="fit4")
Warning in par(mfrow = c(2, 2), main = "fit4"): "main" is not a graphical
parameter
plot(fit4,pch=20, cex=.8, col="steelblue")mtext("fit4", side =3, line =-2, cex =2, outer =TRUE)
# Uses coefplot to plot coefficients. Note the line at 0.par(mfrow=c(1,1))arm::coefplot(fit4)
### Nonlinear terms and Interactionsfit5=lm(medv~lstat*age,Boston) # include both variables and the interaction term x1:x2summary(fit5)
Call:
lm(formula = medv ~ lstat * age, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.806 -4.045 -1.333 2.085 27.552
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 36.0885359 1.4698355 24.553 < 2e-16 ***
lstat -1.3921168 0.1674555 -8.313 8.78e-16 ***
age -0.0007209 0.0198792 -0.036 0.9711
lstat:age 0.0041560 0.0018518 2.244 0.0252 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 6.149 on 502 degrees of freedom
Multiple R-squared: 0.5557, Adjusted R-squared: 0.5531
F-statistic: 209.3 on 3 and 502 DF, p-value: < 2.2e-16
## I() identity function for squared term to interpret as-is## Combine two command lines with semicolonfit6=lm(medv~lstat +I(lstat^2),Boston); summary(fit6)
Call:
lm(formula = medv ~ lstat + I(lstat^2), data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.2834 -3.8313 -0.5295 2.3095 25.4148
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.862007 0.872084 49.15 <2e-16 ***
lstat -2.332821 0.123803 -18.84 <2e-16 ***
I(lstat^2) 0.043547 0.003745 11.63 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.524 on 503 degrees of freedom
Multiple R-squared: 0.6407, Adjusted R-squared: 0.6393
F-statistic: 448.5 on 2 and 503 DF, p-value: < 2.2e-16
Sales CompPrice Income Advertising
Min. : 0.000 Min. : 77 Min. : 21.00 Min. : 0.000
1st Qu.: 5.390 1st Qu.:115 1st Qu.: 42.75 1st Qu.: 0.000
Median : 7.490 Median :125 Median : 69.00 Median : 5.000
Mean : 7.496 Mean :125 Mean : 68.66 Mean : 6.635
3rd Qu.: 9.320 3rd Qu.:135 3rd Qu.: 91.00 3rd Qu.:12.000
Max. :16.270 Max. :175 Max. :120.00 Max. :29.000
Population Price ShelveLoc Age Education
Min. : 10.0 Min. : 24.0 Bad : 96 Min. :25.00 Min. :10.0
1st Qu.:139.0 1st Qu.:100.0 Good : 85 1st Qu.:39.75 1st Qu.:12.0
Median :272.0 Median :117.0 Medium:219 Median :54.50 Median :14.0
Mean :264.8 Mean :115.8 Mean :53.32 Mean :13.9
3rd Qu.:398.5 3rd Qu.:131.0 3rd Qu.:66.00 3rd Qu.:16.0
Max. :509.0 Max. :191.0 Max. :80.00 Max. :18.0
Urban US
No :118 No :142
Yes:282 Yes:258
fit1=lm(Sales~.+Income:Advertising+Age:Price,Carseats) # add two interaction termssummary(fit1)
attach(Carseats)contrasts(Carseats$ShelveLoc) # what is contrasts function?
Good Medium
Bad 0 0
Good 1 0
Medium 0 1
?contrasts
Write an R function to combine lm, plot, & abline functions
### Writing an R function to combine the lm, plot and abline functions to ### create a one step regression fit plot functionregplot=function(x,y){ fit=lm(y~x)plot(x,y, pch=20)abline(fit,col="firebrick")}attach(Carseats)
The following objects are masked from Carseats (pos = 3):
Advertising, Age, CompPrice, Education, Income, Population, Price,
Sales, ShelveLoc, Urban, US
regplot(Price,Sales)
## Allow extra room for additional arguments/specificationsregplot=function(x,y,...){ fit=lm(y~x)plot(x,y,...)abline(fit,col="firebrick")}regplot(Price,Sales,xlab="Price",ylab="Sales",col="steelblue",pch=20)
Next Steps
## Additional note: try out the coefplot2 package to finetune the coefplots##install.packages("coefplot2", repos="http://www.math.mcmaster.ca/bolker/R", type="source")## library(coefplot2)# Exercise # Try other combination of interactive terms# How to interpret interactive terms?# Read: Brambor, T., Clark, W.R. and Golder, M., 2006. Understanding interaction models: Improving empirical analyses. Political analysis, 14(1), pp.63-82.# What are qualitative variables? What class should they be?