#------------------ # Data Preparation #------------------ #Read datasets #Download the data from http://www.saedsayad.com/datasets/BikeRental.zip train <- read.csv("bike_rental_train.csv") test <- read.csv("bike_rental_test.csv") #Rows and Cols dim(train) dim(test) #Columns name colnames(train) colnames(test) #Show head(train) head(test) #Rows and Cols dim(train) dim(test) #Columns name colnames(train) colnames(test) #Show head(train) head(test) #Scatter plot pairs(~temp+humidity+windspeed+bike_rent_count, data=train, main="Scatterplot - train", col="darkgreen") pairs(~temp+humidity+windspeed+bike_rent_count, data=test, main="Scatterplot - test", col="brown") #---------------------------------- # K Nearest Neighbors - Regression #---------------------------------- library(kknn) #Train model.KNN <- kknn(bike_rent_count~., train, test, k=5, distance = 2, scale=FALSE) summary(model.KNN) #Test pred.KNN <- predict(model.KNN, newdata=test) err.KNN <- test$bike_rent_count - pred.KNN rmse.KNN <- sqrt(mean((err.KNN^2))) #Errors histogram hist(err.KNN, main="bike_rent_count", sub="(Actual-Predicted)", xlab="Error", breaks=10, col="darkred") #Residual plot plot(test$temp, err.KNN, ylab="Residuals", xlab="Temperature", main="Residual Plot (test)") abline(0, 0) #Q-Q plot stdres.KNN = scale(err.KNN) qqnorm(stdres.KNN, ylab="Standardized Residuals", xlab="Normal Scores", main="QQ Plot") qqline(stdres.KNN)