library(tidyverse) Advertising <- read.csv("~/Nextcloud/2022 Análisis de datos para ciencias/Advertising.csv") # Qué pasa con cada variable por separado? ggplot(Advertising, aes(x = newspaper, y = sales)) + geom_point() + geom_smooth(method = 'lm', se = FALSE) reg = lm(data = Advertising, sales ~ newspaper) summary(reg) # Y si juntamos todo? reg_mul = lm(data = Advertising, sales ~ .) summary(reg_mul) plot(Advertising$newspaper, reg_mul$residuals) # Recordar que las correlaciones entre predictores pueden dar problemas library(corrplot) corrplot(cor(Advertising), method = "number", type = "upper") # Selección de modelos library(ISLR) Hitters = Hitters sum(is.na(Hitters)) # Cuenta cantidad de datos faltantes en la base de datos Hitters = na.omit(Hitters) # Removemos las observaciones con datos faltantes sum(is.na(Hitters)) library(leaps) reg_full = regsubsets(Salary ~ ., data = Hitters, nvmax = 19) srf = summary(reg_full) plot(reg_full, scale = "bic") summary(reg_full)$bic coef(reg_full, 6) coef(reg_full, 10) reg_fwd = regsubsets(Salary ~ ., data = Hitters, nvmax = 19, method = "forward") reg_bwd = regsubsets(Salary ~ ., data = Hitters, nvmax = 19, method = "backward") summary(reg_fwd) summary(reg_bwd) # Incorporando otros órdenes de predictores # incorporar la variable TV x Radio: reg_mul = lm(data = Advertising, sales ~ TV + radio + newspaper + TV:radio) summary(reg_mul) # incorporar varios órdenes polinomiales de una variable: reg_mul = lm(data = Advertising, sales ~ radio + newspaper + poly(TV, 3)) summary(reg_mul) # imponiendo que el intercept valga 0: reg_mul = lm(data = Advertising, sales ~ TV + radio + newspaper - 1) summary(reg_mul)