Entrenando dos modelos para predecir ingresos
El objetivo de esta práctica es que puedan entrenar dos modelos con caret
y compararlos. Para ello, volveremos al problema de regresión original: queremos construir un modelo que permita predecir los ingresos de la ocupación principal (\(p21\)) de los ocupados en la semana de referencia encuestados en el II Trimestre de 2015.
Para ello, deberán entrenar una regresión lineal y un modelo de random forest y comparar sus performances:
Importar las librerías a utilizar
library(caret)
library(tidyverse)
library(rpart)
Importar y setear los datos correctamente
load('../data/EPH_2015_II.RData')
data$pp03i<-factor(data$pp03i, labels=c('1-SI', '2-No', '9-NS'))
data$intensi<-factor(data$intensi, labels=c('1-Sub_dem', '2-SO_no_dem',
'3-Ocup.pleno', '4-Sobreoc',
'5-No trabajo', '9-NS'))
data$pp07a<-factor(data$pp07a, labels=c('0-NC',
'1-Menos de un mes',
'2-1 a 3 meses',
'3-3 a 6 meses',
'4-6 a 12 meses',
'5-12 a 60 meses',
'6-Más de 60 meses',
'9-NS'))
df_imp <- data %>%
filter(imp_inglab1==1) %>%
select(-imp_inglab1)
df_train <- data %>%
filter(imp_inglab1==0) %>%
select(-imp_inglab1) %>%
mutate(p21 = case_when(
p21==0 ~ 100,
TRUE ~ p21))
Hacer un train-test split
set.seed(6615)
tr_index <- createDataPartition(y=df_train$p21,
p=0.8,
list=FALSE)
train <- df_train[tr_index,]
test <- df_train[-tr_index,]
Entrenar la regresión lineal
lm_fit <- train(p21~., method='lm',
data=train,
trControl=trainControl(method='none'))
Setear la partición para el tuneo del reandom forest
set.seed(7412)
cv_index_final <- createFolds(y = train$p21,
k=5,
list=FALSE,
returnTrain=TRUE)
fitControl <- trainControl(
index=cv_index_final,
method="cv",
number=5,
allowParallel=FALSE)
Generar la grilla de hiperparámetros
grid_rf <- expand.grid(mtry=c(21),
splitrule='variance',
min.node.size=c(1))
Entrenar el modelo
t0<-proc.time()
rf_fit_regresion <- train(p21 ~ .,
data = train,
method = "ranger",
trControl = fitControl,
tuneGrid = grid_rf,
verbose = FALSE)
Realizar las predicciones en el test-set y comparar
eval_regression <- function(model, test_set, y){
preds <- predict(model, test_set)
metrics <- postResample(preds, y)
return(metrics)
}
models <- list(lm = lm_fit,
rf = rf_fit_regresion)
model_metrics <- models %>%
map(eval_regression, test, test$p21)
prediction from a rank-deficient fit may be misleadingForest grown in ranger version <0.11.5, converting ...Error in predict.ranger.forest(forest, data, predict.all, num.trees, type, :
Error: One or more independent variables not found in data.
Comparar en un scatter_plot las predicciones de cada modelo con los valores reales
r
r model_preds <- models %>% map(predict, test) %>% as_tibble() %>% mutate(y = test$p21)
prediction from a rank-deficient fit may be misleading
r
r ggplot(model_preds) + geom_point(aes(x=lm, y=y, color=‘Modelo Lineal’)) + geom_point(aes(x=rf, y=y, color=‘Random Forest’)) + labs(x = , y = , color=‘Modelos’)

LS0tCnRpdGxlOiAiUHLDoWN0aWNhIGluZGVwZW5kaWVudGUgLSBTT0xVQ0lPTiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyMgRW50cmVuYW5kbyBkb3MgbW9kZWxvcyBwYXJhIHByZWRlY2lyIGluZ3Jlc29zCgpFbCBvYmpldGl2byBkZSBlc3RhIHByw6FjdGljYSBlcyBxdWUgcHVlZGFuIGVudHJlbmFyIGRvcyBtb2RlbG9zIGNvbiBgY2FyZXRgIHkgY29tcGFyYXJsb3MuIFBhcmEgZWxsbywgdm9sdmVyZW1vcyBhbCBwcm9ibGVtYSBkZSByZWdyZXNpw7NuIG9yaWdpbmFsOiBxdWVyZW1vcyBjb25zdHJ1aXIgdW4gbW9kZWxvIHF1ZSBwZXJtaXRhIHByZWRlY2lyIGxvcyBpbmdyZXNvcyBkZSBsYSBvY3VwYWNpw7NuIHByaW5jaXBhbCAoJHAyMSQpIGRlIGxvcyBvY3VwYWRvcyBlbiBsYSBzZW1hbmEgZGUgcmVmZXJlbmNpYSBlbmN1ZXN0YWRvcyBlbiBlbCBJSSBUcmltZXN0cmUgZGUgMjAxNS4KClBhcmEgZWxsbywgZGViZXLDoW4gZW50cmVuYXIgdW5hIHJlZ3Jlc2nDs24gbGluZWFsIHkgdW4gbW9kZWxvIGRlIHJhbmRvbSBmb3Jlc3QgeSBjb21wYXJhciBzdXMgcGVyZm9ybWFuY2VzOgoKCiMjIyBJbXBvcnRhciBsYXMgbGlicmVyw61hcyBhIHV0aWxpemFyCgpgYGB7ciwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeShjYXJldCkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocnBhcnQpCmBgYAoKCiMjIyBJbXBvcnRhciB5IHNldGVhciBsb3MgZGF0b3MgY29ycmVjdGFtZW50ZQoKYGBge3J9Cgpsb2FkKCcuLi9kYXRhL0VQSF8yMDE1X0lJLlJEYXRhJykKCmRhdGEkcHAwM2k8LWZhY3RvcihkYXRhJHBwMDNpLCBsYWJlbHM9YygnMS1TSScsICcyLU5vJywgJzktTlMnKSkKCgoKZGF0YSRpbnRlbnNpPC1mYWN0b3IoZGF0YSRpbnRlbnNpLCBsYWJlbHM9YygnMS1TdWJfZGVtJywgJzItU09fbm9fZGVtJywgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJzMtT2N1cC5wbGVubycsICc0LVNvYnJlb2MnLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICc1LU5vIHRyYWJham8nLCAnOS1OUycpKQoKZGF0YSRwcDA3YTwtZmFjdG9yKGRhdGEkcHAwN2EsIGxhYmVscz1jKCcwLU5DJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICcxLU1lbm9zIGRlIHVuIG1lcycsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnMi0xIGEgMyBtZXNlcycsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnMy0zIGEgNiBtZXNlcycsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnNC02IGEgMTIgbWVzZXMnLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJzUtMTIgYSA2MCBtZXNlcycsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnNi1Nw6FzIGRlIDYwIG1lc2VzJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICc5LU5TJykpCgpkZl9pbXAgPC0gZGF0YSAlPiUKICAgICAgICBmaWx0ZXIoaW1wX2luZ2xhYjE9PTEpICU+JQogICAgICAgIHNlbGVjdCgtaW1wX2luZ2xhYjEpCgpkZl90cmFpbiA8LSBkYXRhICU+JQogICAgICAgIGZpbHRlcihpbXBfaW5nbGFiMT09MCkgJT4lCiAgICAgICAgc2VsZWN0KC1pbXBfaW5nbGFiMSkgJT4lCiAgICAgICAgbXV0YXRlKHAyMSA9IGNhc2Vfd2hlbigKICAgICAgICAgICAgICAgICAgICAgICAgcDIxPT0wIH4gMTAwLAogICAgICAgICAgICAgICAgICAgICAgICBUUlVFIH4gcDIxKSkKCgpgYGAKCgoKIyMjIEhhY2VyIHVuIHRyYWluLXRlc3Qgc3BsaXQKCmBgYHtyfQpzZXQuc2VlZCg2NjE1KQp0cl9pbmRleCA8LSBjcmVhdGVEYXRhUGFydGl0aW9uKHk9ZGZfdHJhaW4kcDIxLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHA9MC44LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGxpc3Q9RkFMU0UpCgp0cmFpbiA8LSBkZl90cmFpblt0cl9pbmRleCxdCnRlc3QgPC0gZGZfdHJhaW5bLXRyX2luZGV4LF0KCmBgYAoKCiMjIEVudHJlbmFyIGxhIHJlZ3Jlc2nDs24gbGluZWFsCgpgYGB7cn0KbG1fZml0IDwtIHRyYWluKHAyMX4uLCBtZXRob2Q9J2xtJywKICAgICAgICAgICAgICAgIGRhdGE9dHJhaW4sCiAgICAgICAgICAgICAgICB0ckNvbnRyb2w9dHJhaW5Db250cm9sKG1ldGhvZD0nbm9uZScpKQpgYGAKCgojIyBTZXRlYXIgbGEgcGFydGljacOzbiBwYXJhIGVsIHR1bmVvIGRlbCByZWFuZG9tIGZvcmVzdAoKYGBge3J9CgpzZXQuc2VlZCg3NDEyKQpjdl9pbmRleF9maW5hbCA8LSBjcmVhdGVGb2xkcyh5ID0gdHJhaW4kcDIxLAogICAgICAgICAgICAgICAgICAgICAgICBrPTUsCiAgICAgICAgICAgICAgICAgICAgICAgIGxpc3Q9RkFMU0UsCiAgICAgICAgICAgICAgICAgICAgICAgIHJldHVyblRyYWluPVRSVUUpCgpmaXRDb250cm9sIDwtIHRyYWluQ29udHJvbCgKICAgICAgICBpbmRleD1jdl9pbmRleF9maW5hbCwgCiAgICAgICAgbWV0aG9kPSJjdiIsCiAgICAgICAgbnVtYmVyPTUsCiAgICAgICAgYWxsb3dQYXJhbGxlbD1GQUxTRSkKCmBgYAoKCiMjIyBHZW5lcmFyIGxhIGdyaWxsYSBkZSBoaXBlcnBhcsOhbWV0cm9zCgpgYGB7cn0KZ3JpZF9yZiA8LSBleHBhbmQuZ3JpZChtdHJ5PWMoMjEpLAogICAgICAgICAgICAgICAgICAgIHNwbGl0cnVsZT0ndmFyaWFuY2UnLAogICAgICAgICAgICAgICAgICAgIG1pbi5ub2RlLnNpemU9YygxKSkKYGBgCgoKIyMjIEVudHJlbmFyIGVsIG1vZGVsbwpgYGB7cn0KdDA8LXByb2MudGltZSgpCnJmX2ZpdF9yZWdyZXNpb24gPC0gdHJhaW4ocDIxIH4gLiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgZGF0YSA9IHRyYWluLCAKICAgICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAicmFuZ2VyIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgdHJDb250cm9sID0gZml0Q29udHJvbCwKICAgICAgICAgICAgICAgICAgICAgICAgICB0dW5lR3JpZCA9IGdyaWRfcmYsCiAgICAgICAgICAgICAgICAgICAgICAgICAgdmVyYm9zZSA9IEZBTFNFKQpwcm9jLnRpbWUoKSAtIHQwCgpzYXZlUkRTKHJmX2ZpdF9yZWdyZXNpb24sICcuLi9tb2RlbHMvcmZfZml0X3JlZ3Jlc2lvbi5SRFMnKQoKcmZfZml0X3JlZ3Jlc2lvbgpgYGAKCmBgYHtyLCBpbmNsdWRlPUZBTFNFfQpyZl9maXRfcmVncmVzaW9uIDwtIHJlYWRSRFMoJy4uLy4uL2NsYXNlXzMvbW9kZWxzL3JmX2ZpdF9yZWdyZXNpb24uUkRTJykKYGBgCgoKIyMjIFJlYWxpemFyIGxhcyBwcmVkaWNjaW9uZXMgZW4gZWwgdGVzdC1zZXQgeSBjb21wYXJhcgoKYGBge3J9CmV2YWxfcmVncmVzc2lvbiA8LSBmdW5jdGlvbihtb2RlbCwgdGVzdF9zZXQsIHkpewogICAgICAgIHByZWRzIDwtIHByZWRpY3QobW9kZWwsIHRlc3Rfc2V0KQogICAgICAgIAogICAgICAgIG1ldHJpY3MgPC0gcG9zdFJlc2FtcGxlKHByZWRzLCB5KSAKICAgICAgICByZXR1cm4obWV0cmljcykKfQoKCm1vZGVscyA8LSBsaXN0KGxtID0gbG1fZml0LAogICAgICAgICAgICAgICByZiA9IHJmX2ZpdF9yZWdyZXNpb24pCgoKbW9kZWxfbWV0cmljcyA8LSBtb2RlbHMgJT4lCiAgICAgICAgbWFwKGV2YWxfcmVncmVzc2lvbiwgdGVzdCwgdGVzdCRwMjEpCgptb2RlbF9tZXRyaWNzCgpgYGAKCgojIyMgQ29tcGFyYXIgZW4gdW4gc2NhdHRlcl9wbG90IGxhcyBwcmVkaWNjaW9uZXMgZGUgY2FkYSBtb2RlbG8gY29uIGxvcyB2YWxvcmVzIHJlYWxlcwoKYGBge3J9Cm1vZGVsX3ByZWRzIDwtIG1vZGVscyAlPiUKICAgICAgICBtYXAocHJlZGljdCwgdGVzdCkgJT4lCiAgICAgICAgYXNfdGliYmxlKCkgJT4lCiAgICAgICAgbXV0YXRlKHkgPSB0ZXN0JHAyMSkKCmdncGxvdChtb2RlbF9wcmVkcykgKyAKICAgICAgICBnZW9tX3BvaW50KGFlcyh4PWxtLCB5PXksIGNvbG9yPSdNb2RlbG8gTGluZWFsJykpICsgCiAgICAgICAgZ2VvbV9wb2ludChhZXMoeD1yZiwgeT15LCBjb2xvcj0nUmFuZG9tIEZvcmVzdCcpKSArCiAgICAgICAgbGFicyh4ID0gIlByZWRpY2Npb25lcyIsCiAgICAgICAgICAgICB5ID0gIk9ic2VydmFkb3MiLAogICAgICAgICAgICAgY29sb3I9J01vZGVsb3MnKQoKYGBgCg==