* Movies Example -- source code file movies.sas; options linesize=70 nodate; data movies; infile "c:/datasets/movies.txt" firstobs=2; input title $ 1-20 opening budget isstar $ issummer $; * Create dummy variables for isstar and issummer; star_dum = (isstar = "Star"); sum_dum = (issummer = "Summer"); * Create symbol numbers for star_dum and sum_dum We do this because we cannot define symbol0; star_sym = star_dum + 1; sum_sym = sum_dum + 1; drop isstar issummer; proc print; goptions reset=all; symbol1 v="0" c=blue; symbol2 v="1" c=red; proc gplot data=movies; plot opening * budget = star_sym; plot opening * budget = sum_sym; * Compute all regression models. Select the best model using adjusted R-squared; proc rsquare adjrsq aic bic cp; model opening = budget star_dum sum_dum; * Perform more detailed analysis of best model; proc reg; model opening = budget star_dum; * Save the residuals and predicted values in output dataset; output out=out r=residuals p=predicted; proc print data=out; * Plot residuals using star_sym as plotting symbol; proc gplot data=out; plot residuals*predicted = star_sym / vref=0 cvref="black" lvref=3; plot residuals*budget = star_sym / vref=0 cvref="black" lvref=3; * This last plot is not very useful, but it shows the result of plotting residuals vs. a dummy variable; plot residuals*sum_dum = star_sym / vref=0 cvref="black" lvref=3; run; quit;