# Movies Example -- source code file movies.r # Read and print data frame. movies = read.table("movies2.txt", header=T) cat("movies data frame:\n") print(movies) # Create new data frame using dummy variables. star_dum = as.numeric(movies$isstar == "Star") sum_dum = as.numeric(movies$issum == "Summer") with_dummy_vars = data.frame(title=movies$title, opening=movies$opening, budget=movies$budget, star_dum=star_dum, sum_dum=sum_dum) cat("with_dummy_vars data frame:\n") print(with_dummy_vars) # Perform backward selection to pick best model. fullmodel = lm(opening ~ budget + star_dum + sum_dum, data=with_dummy_vars) print(summary(step(fullmodel, direction="backward"))) # Obtain best model after looking at step summary. model = lm(opening ~ budget + star_dum, data=with_dummy_vars) # Create scatterplots and residual plots. r = residuals(model) p = fitted(model) attach(with_dummy_vars) pdf("movies.pdf") # Plot A plot(budget, opening, main="Plot A: Scatterplot Using Plotting Symbol star_dum", xlab="Movie Budget", ylab="Opening Weekend Gross", pch=as.character(star_dum)) # Plot B plot(budget, opening, main="Plot B: Scatterplot Using Plotting Symbol sum_dum", xlab="Movie Budget", ylab="Opening Weekend Gross", pch=as.character(sum_dum)) # Plot C plot(p, r, main="Plot C: Residual Plot Using Plotting Symbol star_dum", xlab="Predicted Values", ylab="Residuals", pch=as.character(star_dum)) abline(h=0, lty="dashed", col="red") # Plot D plot(budget, r, main="Plot D: Residual Plot Using Plotting Symbol star_dum", xlab="Movie Budget", ylab="Residuals", pch=as.character(star_dum)) abline(h=0, lty="dashed", col="red") # Plot E This plot is not very useful, but it shows how # to plot residuals vs. a dummy variable. plot(sum_dum, r, main="Plot E: Residual Plot Using Plotting Symbol star_dum", xlab="Dummy Variable for issummer", ylab="Residuals", pch=as.character(star_dum)) abline(h=0, lty="dashed", col="red") dev.off( )