Split a dataset into test and training sets. * ------------------------------------------------------------------------------; * SAS Code ; *-------------------------------------------------------------------------------; data propval; infile "c:/datasets/propval.txt" firstobs=15; input y x1-x9; proc surveyselect data=propval method=srs seed=43543 outall samprate=0.5 out=subsets; * Values of Selected variable: 1 means selected for training set, 0 means test set; proc print data=subsets; title "Subsets Dataset"; data training; set subsets; if Selected=1; proc print; data test; set subsets; if Selected=0; proc print; #------------------------------------------------------------------------------- # R code #------------------------------------------------------------------------------- # Read and print data frame. propval = read.table("c:/datasets/propval.txt", header=T, skip=13) cat("propval data frame:\n") print(propval) # Set seed for random number generator so everyone # using this code gets the same results. set.seed(34839) # Select half of the rows to derive model (training), # select the other half for validation (test). s = sample(nrow(propval), size=as.integer(nrow(propval) / 2)) print(s) training = propval[s,] cat("training data frame:\n") print(training) test = propval[-s,] cat("test data frame:\n") print(test)