# What's the structure of the dataset?
str(cars)
# What are the variable names? (Column names in R)
colnames(cars)
# What are the dimensions? (Number of rows and columns)
dim(cars)
# How many rows are there?
nrow(cars)
# How many columns are there?
ncol(cars)
# R couldn't find a name for the first column, so it named it 'X'
# Let's rename it 'name' since it is obviously the car name
colnames(cars)[1] = "name"
# Now the first column name should be "name"
head(cars)
help(help)
?read.csv
# Indexing
# Display the value at row 1, column 3
cars[1,3]
# Display rows 2-5, column 6
cars[2:5, 6]
# Display all rows, column 1
cars[,1]
# Display row 4, all columns
cars[4,]
# You can also use the '$' to access columns for dataframes.
colnames(cars)
cars$mpg
cars$wt
# Analyzing Data
# Summary of all variables
summary(cars)
# Frequency
table(cars$gear)
# Exercises
# What is the mean weight of the cars?
# What is the correlation between the weight and the mpg?
# What is the horsepower of the Cadillac Fleetwood?
# How many cars have six cylinders?
# Exercise Answers
# What is the mean weight of the cars?
mean(cars$wt)*1000
# What is the correlation between the weight and the mpg?
cor(cars$mpg,cars$wt)
# What is the horsepower of the Cadillac Fleetwood?
cars
# How many cars have six cylinders?
table(cars$cyl)
# Creating vectors and matrix
vector1 = c(1,2,3,4)
vector1
matrix1 = matrix(vector1, nrow=2, byrow=TRUE)
# Operators: Examples
# Arithmetic Operators
2 + 5
# Logical Operators
6 < 3
"hello" == "HELLO"
"hello" == "hello"
# Manipulating Data
# You can combine indexing with operators
# What are the cars that have vs of 1?
cars[cars$vs == 1,]
# What are the cars that have 6 or more cylinders?
cars[cars$cyl >= 6,]
# What are the names of the cars that have horsepower > 100?
cars$name[cars$hp > 100]
# What are the names of the cars that have weight > 3000 lb?
cars$name[cars$wt > 3]
# What is mean mpg of cars with 4 cylinders?
mean(cars$mpg[cars$cyl==4])
# 6 cylinders?
mean(cars$mpg[cars$cyl==6])
# Transforming Variables
wt.lb = cars$wt * 1000
# Linear Regression Example (OLS)
# Dependent variable: mpg
# Independent variables: weight, horsepower, number of cylinders
regr = lm(cars$mpg ~ wt.lb + cars$hp + cars$cyl)
summary(regr)
# The new object "regr" is of a special class: lm
class(regr)
# Here are the things in "regr" that we can look at
names(regr)
# We can look at the things in the object using
# the '$'
regr$residuals
regr$coefficients