Map > Problem Definition > Data Preparation > Data Exploration > Modeling > Evaluation > Classification |
Evaluation - Classification |
I. Data Preparation |
1- Load libraries |
library(data.table) library(formattable) library(plotrix) library(dplyr) library(Rtsne) library(MASS) library(xgboost) library(factoextra) library(caTools) library(pROC) library(caret) library(gains) library(lift) |
2- Read Expressions file |
df <- read.csv("GSE74763_rawlog_expr.csv") df2 <- df[,-1] rownames(df2) <- df[,1] expr <- transpose(df2) rownames(expr) <- colnames(df2) colnames(expr) <- rownames(df2) dim(expr) |
3- Read Samples file |
targets <- read.csv("GSE74763_rawlog_targets.csv") colnames(targets) dim(targets) |
4- Merge Expressions with Samples |
data <- cbind(expr, targets) colnames(data) dim(data) |
II. Splitting Data into Training and Test Sets |
set.seed(101) d1 <- data groups <- as.fumeric(data$target) sample = sample.split(d1$target, SplitRatio = .8) train = subset(d1, sample == TRUE) test = subset(d1, sample == FALSE) dim(train) dim(test) |
|
III. Logistic Regression |
#train model <- glm(target ~ P000833+P007414+P002449, data = train, family=binomial(logit), maxit = 100) print(model) #test pb <- predict(model, test, type="response") pb <- as.data.frame(pb) |
|
IV. Confusion Matrix |
pc <- NULL pc <- ifelse(pb$pb > 0.5,"1","0") summary(pc) xtab <- table(pc, test$target) caret::confusionMatrix(xtab, positive = "1") |
|
V. ROC Chart |
pb <- NULL pb <- predict(model, test, type="response") pb <- as.data.frame(pb) labels <- test$target scores <- pb$pb plot(roc(labels, scores, direction="<"), col="blue", lwd=3, main="ROC Chart") auc(roc(labels, scores, direction="<")) |
|
|
VI. Gain and Lift Charts |
pb <- NULL pb <- predict(model, test, type="response") pb <- as.data.frame(pb) labels <- test$target scores <- pb$pb gains(labels, scores, groups=10) plot(gains(labels, scores, groups=10)) plotLift(scores, labels, cumulative = TRUE, n.buckets = 10) |
|
|
|
VII. Bioada SmartArray |
Watch this video to learn how you can perform classification model evaluation using Bioada SmartArray significantly faster and easier. |