#------------------ # Data Preparation #------------------ #Read datasets #Download the data from http://www.saedsayad.com/datasets/CreditData.zip train <- read.csv("Credit_train.csv") test <- read.csv("Credit_test.csv") #Rows and Cols dim(train) dim(test) #Columns name colnames(train) colnames(test) #Show head(train) head(test) #-------------------------------------------------- # Data Exploration - Univariate analysis - Binning #-------------------------------------------------- #variable x <- train$BUSAGE #graph layout def.par <- par(no.readonly = TRUE) layout(mat=rbind(1:2,3:4)) #equal width table(discretize(x, categories=3)) hist(x, breaks=20, main="Equal Width", xlab="BUSAGE") abline(v=discretize(x, categories=3, onlycuts=TRUE), col="red") #equal frequency table(discretize(x, "frequency", categories=3)) hist(x, breaks=20, main="Equal Frequency", xlab="BUSAGE") abline(v=discretize(x, method="frequency", categories=3, onlycuts=TRUE), col="red") #user-specified table(discretize(x, "fixed", categories =c(-Inf, 100, Inf), labels=c("low", "high"))) hist(x, breaks=20, main="Fixed", xlab="BUSAGE") abline(v=discretize(x, method="fixed", c(-Inf, 100, Inf) , onlycuts=TRUE), col="red") #log y <- as.factor(floor(log1p(x))) plot(y, main="Log Normal", xlab="BUSAGE") #reset graph par(def.par)