m1 = loess(wage ~ age, data = datos, span = 0.2)
m2 = loess(wage ~ age, data = datos, span = 0.5)
summary(m1)
## Call:
## loess(formula = wage ~ age, data = datos, span = 0.2)
##
## Number of Observations: 2921
## Equivalent Number of Parameters: 16.27
## Residual Standard Error: 30.18
## Trace of smoother matrix: 17.99 (exact)
##
## Control settings:
## span : 0.2
## degree : 2
## family : gaussian
## surface : interpolate cell = 0.2
## normalize: TRUE
## parametric: FALSE
## drop.square: FALSE
age_grid = seq(from = min(datos$age), to = max(datos$age), by = 1)
# con loess hay que utizar se = T, ya que interval = "" no funciona
yp1 = predict(m1, newdata = data.frame(age = age_grid), se = T)
yp2 = predict(m2, newdata = data.frame(age = age_grid), se = T)
plot(datos$age,datos$wage, cex = 0.5, col = "darkgrey", ylab = "wage (x 1000 $)", xlab = "age")
#
lines(age_grid, yp1$fit, col = "blue", lwd = 2)
lines(age_grid, yp2$fit, col = "red", lwd = 2)
#
legend(60,200, legend = c("s = 0.2", "s = 0.5"), col = c("blue","red"), lty = 1, lwd = 2)
Recordemos que:
\[ x_p^T \hat \beta - t_{\alpha/2} \hat s_R\sqrt{x_p^T (X^TX)^{-1} x_p} \leq (x_p^T \beta) \leq x_p^T \hat \beta + t_{\alpha/2} \hat s_R\sqrt{x_p^T (X^TX)^{-1} x_p} \]
alfa = 0.05
yp11 = yp1$fit + qnorm(alfa/2)*yp1$se.fit # utilizamos la normal en lugar de la t-student
yp12 = yp1$fit + qnorm(1-alfa/2)*yp1$se.fit
plot(datos$age,datos$wage, cex = 0.5, col = "darkgrey", ylab = "wage (x 1000 $)", xlab = "age")
#
lines(age_grid, yp1$fit, col = "blue", lwd = 2)
lines(age_grid, yp11, col = "blue", lty = 3)
lines(age_grid, yp12, col = "blue", lty = 3)
\[ x_p^T \hat \beta - t_{\alpha/2} \hat s_R\sqrt{1 + x_p^T (X^TX)^{-1} x_p} \leq y_p \leq x_p^T \hat \beta + t_{\alpha/2} \hat s_R\sqrt{1 + x_p^T (X^TX)^{-1} x_p} \]
sR = m1$s
alfa = 0.05
yp13 = yp1$fit + qnorm(alfa/2)*sqrt(sR^2 + yp1$se.fit^2)
yp14 = yp1$fit + qnorm(1-alfa/2)*sqrt(sR^2 + yp1$se.fit^2)
plot(datos$age,datos$wage, cex = 0.5, col = "darkgrey", ylab = "wage (x 1000 $)", xlab = "age")
#
lines(age_grid, yp1$fit, col = "blue", lwd = 2)
lines(age_grid, yp11, col = "blue", lty = 3)
lines(age_grid, yp12, col = "blue", lty = 3)
#
lines(age_grid, yp13, col = "red", lty = 3)
lines(age_grid, yp14, col = "red", lty = 3)