Começa scripts de classificadores lineares.

parent 16d7eef4
---
title: "Regressão logística"
author: Prof. Walmes M. Zeviani & Prof. Eduardo V. Ferreira
date: 2017-11-14
#bibliography: ../config/Refs.bib
#csl: ../config/ABNT-UFPR-2011-Mendeley.csl
---
```{r, include = FALSE}
source("../config/setup.R")
opts_chunk$set(
cache = FALSE,
message = FALSE,
warning = FALSE)
```
# Resposta dicotômica
* <https://datascienceplus.com/perform-logistic-regression-in-r/>;
* <http://dataaspirant.com/2017/03/14/multinomial-logistic-regression-model-works-machine-learning/>;
* <https://machinelearningmastery.com/linear-classification-in-r/>;
```{r}
library(lattice)
library(latticeExtra)
# Carrega os dados.
url <- paste0("http://archive.ics.uci.edu/ml/machine-learning-databases",
"/credit-screening/crx.data")
cre <- read.csv(url, header = FALSE, na.string = "?")
names(cre)[16] <- "y"
summary(cre)
# Visualiza a resposta contra as preditoras métricas.
n <- sapply(cre[, -16], is.numeric)
f <- sprintf("y ~ %s",
paste(names(cre)[1:15][n],
collapse = " + "))
xyplot(as.formula(f),
outer = TRUE,
data = cre,
as.table = TRUE,
jitter.y = TRUE,
amount = 0.025,
scales = list(x = list(relation = "free", log = FALSE))) +
latticeExtra::layer(panel.smoother(x, y, method = lm))
# Visualiza contra as preditoras categóricas.
v <- names(cre)[1:15][sapply(cre[, -16], is.factor)]
length(v)
keep <- logical(length(v))
names(keep) <- v
par(mfrow = c(3, 3))
for (i in v) {
xt <- xtabs(as.formula(sprintf("~y + %s", i)), data = cre)
# print(min(prop.table(xt)) > 0.1)
if (min(prop.table(xt)) > 0.1) {
keep[i] <- TRUE
}
mosaicplot(xt, main = NULL)
}
layout(1)
# Mantém só variáveis sem separação.
cre <- subset(cre,
select = setdiff(names(cre),
names(which(!keep))))
```
```{r}
# Casos completos.
cc <- complete.cases(cre)
table(cc)
# Elimina os casos perdidos.
cre <- cre[cc, ]
# # Ajusta o modelo.
# m0 <- glm(y ~ 1, data = cre, family = binomial)
# Ajusta o modelo.
m0 <- glm(y ~ ., data = cre, family = binomial)
summary(m0)
# Realiza seleção de preditoras com stepwise via BIC.
m1 <- step(m0, k = log(nrow(cre)))
summary(m1)
# Realiza predição.
yp <- predict(m1, type = "response")
# Erro de classificação.
tb <- table(round(yp), cre$y)
tb
# Percentual de acertos.
sum(diag(tb))/sum(tb)
```
* <https://topepo.github.io/caret/available-models.html>
```{r, eval = FALSE}
library(caret)
# Criando as partições de treino e validação.
set.seed(789)
intrain <- createDataPartition(y = cre$y,
p = 0.75,
list = FALSE)
cre_train <- cre[intrain, ]
cre_test <- cre[-intrain, ]
list(train = nrow(cre_train),
test = nrow(cre_test),
ratio = nrow(cre_train)/nrow(cre))
# Parametriza a valiação cruzada.
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
# Boosted Logistic Regression e outras opções.
set.seed(159)
fit <- train(y ~ .,
data = cre_train,
method = c("LogitBoost", "regLogistic", "plr")[1],
trControl = trctrl)
fit
fit$finalModel
# Predição e matriz de confusão.
yp <- predict(fit, newdata = cre_test)
confusionMatrix(yp, cre_test$y)
```
# Resposta politômica
## Usando o `VGAM`
* <https://machinelearningmastery.com/linear-classification-in-r/>;
```{r}
# Carrega o pacote.
library(VGAM)
# Ajusta o modelo.
fit <- vglm(Species ~ ., family = multinomial, data = iris)
# Exibe o resumo do ajuste.
summary(fit)
# Obtém as predições.
prob <- predict(fit, newdata = iris, type = "response")
pred <- apply(prob, MARGIN = 1, FUN = which.max)
pred <- factor(pred, labels = levels(iris$Species))
# Acurácia.
confusionMatrix(pred, iris$Species)
```
## Usando o `caret`
```{r}
library(caret)
# Criando as partições de treino e validação.
set.seed(987)
intrain <- createDataPartition(y = iris$Species,
p = 0.75,
list = FALSE)
data_train <- iris[intrain, ]
data_test <- iris[-intrain, ]
# Parametriza a valiação cruzada.
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
# Penalized Multinomial Regression, usa a nnet::multinom().
set.seed(159)
fit <- train(Species ~ .,
data = data_train,
method = "multinom",
trControl = trctrl)
fit
fit$finalModel
# Predição e matriz de confusão.
yp <- predict(fit, newdata = data_test)
confusionMatrix(yp, data_test$Species)
```
---
title: "Classificador de Bayes ingênuo"
author: Prof. Walmes M. Zeviani & Prof. Eduardo V. Ferreira
date: 2017-10-26
#bibliography: ../config/Refs.bib
#csl: ../config/ABNT-UFPR-2011-Mendeley.csl
---
```{r, include = FALSE}
source("../config/setup.R")
opts_chunk$set(
cache = FALSE,
message = FALSE,
warning = FALSE)
```
* <https://machinelearningmastery.com/linear-classification-in-r/>
* <https://topepo.github.io/caret/available-models.html>
* <http://archive.ics.uci.edu/ml/datasets.html>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment