## ----Aufgabe1-a, fig.show="hide"-----------------------------------------
library(faraway)
teengamb$sexF <- factor(teengamb$sex, levels = c(0,1), labels = c("m", "w"))
lm.gamb <- lm(gamble ~ income + sexF + status + verbal, data = teengamb)
## library zum plotten von Konfidenzellipsoiden laden
library(car)
# ?confidenceEllipse
confidenceEllipse(lm.gamb, which.coef = c("status", "verbal"), las = 1)
text(x = coef(lm.gamb)[c("status")] + 0.1, y = coef(lm.gamb)[c("verbal")] + 0.5, 
  labels = expression(paste("(",hat(beta)[3], ", ", hat(beta)[4], ")")), 
  pch = 19, cex = 1.2)

## ----Aufgabe1-b, fig.width=5.6, fig.height=5.6---------------------------
confidenceEllipse(lm.gamb, which.coef = c("status", "verbal"), las = 1)
text(x = coef(lm.gamb)[c("status")] + 0.1, y = coef(lm.gamb)[c("verbal")] + 0.5, 
  labels = expression(paste("(",hat(beta)[3], ", ", hat(beta)[4], ")")), 
  pch = 19, cex = 1.2)
## Ursprung
points(0,0, pch = 19, col = 1, cex = 1.2)
text(x = 0+0.05,y = 0-0.35, labels = "(0,0)")
## normale KI
abline(v = confint(lm.gamb)["status", ], lty = 2)
abline(h = confint(lm.gamb)["verbal", ], lty = 2)
## Bonferroni KI
abline(v=confint(lm.gamb,level=1-0.05/2)["status",],lty=2, col = 4)
abline(h=confint(lm.gamb,level=1-0.05/2)["verbal",],lty=2, col = 4)

## ----Aufgabe1-c----------------------------------------------------------
# Naive p-Werte
(naiveP <- summary(lm.gamb)$coefficients[c("status","verbal"),"Pr(>|t|)"])

# Bonferroni-adjustierte p-Werte
p.adjust(naiveP, method = "bonferroni")

# Adjustierung der p-Werte nach dem Maximum-Prinzip
library(multcomp)
glhtObj <- glht(lm.gamb,linfct=c("status = 0", "verbal  = 0"))
summary(glhtObj)$test$pvalues

## ----Aufgabe2-a----------------------------------------------------------
load("C:\\Exchange\\Work\\Teaching\\Limo\\LiMo15\\daten\\pm789.Rdata")
mod <- lm( log10(PM10_Prin) ~ log10(PM10_Joh) + UZ, data=pm789 )
summary(mod)  

## ----Aufgabe2-b, fig.width=5.6, fig.height=5.6---------------------------
library(effects)
modWd <- lm( log10(PM10_Prin) ~ log10(PM10_Joh) + log10(PM10_Joh):cosWd + UZ ,
             data=pm789)
summary(modWd)
plot(effect("log10(PM10_Joh)*cosWd",modWd))

## ----Aufgabe2-c, fig.width=7.6, fig.height=5.6---------------------------
plot(pm789$stwo,log10(pm789$PM10_Prin))
boxplot(log(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".")
abline(v=(0:7)*24+1)

## ----Aufgabe2-d, fig.width=7.6, fig.height=10.6--------------------------
# Polynomiale Schaetzungen
modP1 <- lm(log10(PM10_Prin) ~ stwo, data=pm789)
modP2 <- lm(log10(PM10_Prin) ~ poly(stwo, 2, raw=TRUE), data=pm789)
# statt lm(log10(PM10_Prin) ~ stwo + I(stwo^2), data=pm789)
modP3 <- lm(log10(PM10_Prin) ~ poly(stwo, 3, raw=TRUE), data=pm789)
modP4 <- lm(log10(PM10_Prin) ~ poly(stwo, 4, raw=TRUE), data=pm789)
modP5 <- lm(log10(PM10_Prin) ~ poly(stwo, 5, raw=TRUE), data=pm789)
modP6 <- lm(log10(PM10_Prin) ~ poly(stwo, 6, raw=TRUE), data=pm789)
modP7 <- lm(log10(PM10_Prin) ~ poly(stwo, 7, raw=TRUE), data=pm789)
modP8 <- lm(log10(PM10_Prin) ~ poly(stwo, 8, raw=TRUE), data=pm789)
modP <- list(modP1,modP2,modP3,modP4,modP5,modP6,modP7,modP8)

par(mfrow=c(4,2))
for(i in 1:8){
  boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".",
          "log10(PM10)", main=paste("g =",i), xlab="Wochenstunde",
          border="gray50")
  lines(pm789$stwo[!is.na(pm789$PM10_Prin)], fitted(modP[[i]]), col=2, 
        lwd=2)
  abline(v=(0:7)*24+1)
}
# hierbei werden nur die geschaetzten Werte an den Datenpunkten eingezeichnet und
# miteinander verbunden, aber nicht der Verlauf der geschaetzten Funktion dargestellt

# Definition eines Gitters zur Darstellung des Verlaufs der geschaetzten Funktion
zeitgitter <- seq(min(pm789$stwo), max(pm789$stwo), length=1000)
# dazugehoerige Polynome
zeit1 <- cbind(rep(1, times=1000), poly(zeitgitter, 1, raw=TRUE))
zeit2 <- cbind(rep(1, times=1000), poly(zeitgitter, 2, raw=TRUE))
zeit3 <- cbind(rep(1, times=1000), poly(zeitgitter, 3, raw=TRUE))
zeit4 <- cbind(rep(1, times=1000), poly(zeitgitter, 4, raw=TRUE))
zeit5 <- cbind(rep(1, times=1000), poly(zeitgitter, 5, raw=TRUE))
zeit6 <- cbind(rep(1, times=1000), poly(zeitgitter, 6, raw=TRUE))
zeit7 <- cbind(rep(1, times=1000), poly(zeitgitter, 7, raw=TRUE))
zeit8 <- cbind(rep(1, times=1000), poly(zeitgitter, 8, raw=TRUE))
zeit <- list(zeit1,zeit2,zeit3,zeit4,zeit5,zeit6,zeit7,zeit8)

for(i in 1:8){
  boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".",
          "log10(PM10)", main=paste("g =",i), xlab="Wochenstunde",
          border="gray50")
  lines(zeitgitter, (zeit[[i]] %*% modP[[i]]$coefficients), col=2, 
        lwd=2)
  abline(v=(0:7)*24+1)
}

## ----Aufgabe2-e, fig.width=5.3, fig.height=5.3---------------------------
regspline <- function(kovariable, K, g){
 
 # Bestimmung der Knoten
 knoten <- seq(min(kovariable), max(kovariable), length=K+2)

 # Anlegen der Design-Matrix
 X <- matrix(0, length(kovariable), K+g+1)
 
 # Polynome (in den ersten g+1 Spalten der Designmatrix)
 for(i in 0:g) {
   X[,i+1] <- kovariable^i
 }
 
 # abgeschnittene Potenzen (von der (g+2)-ten bis zur letzten Spalte der Designmatrix)
 for(i in 2:(K+1)){
   X[,i+g] <- (kovariable > knoten[i]) * 
     pmax((kovariable - knoten[i]), 0)^g
 }
 
 return(X)
 }

# Berechnung der modifizierten Variablen f?r verschiedene Polynom-Grade
regspline_g1_K3 <- regspline(pm789$stwo,g=1,K=3)
regspline_g2_K3 <- regspline(pm789$stwo,g=2,K=3)
regspline_g3_K3 <- regspline(pm789$stwo,g=3,K=3)
regspline_g4_K3 <- regspline(pm789$stwo,g=4,K=3)
regspline_g5_K3 <- regspline(pm789$stwo,g=5,K=3)
regspline_g6_K3 <- regspline(pm789$stwo,g=6,K=3)
regspline_g7_K3 <- regspline(pm789$stwo,g=7,K=3)
regspline_g8_K3 <- regspline(pm789$stwo,g=8,K=3)

# Anlegen eines Gitters, damit Funktion "glatt" gezeichnet werden kann
stwoGrid <- seq(min(pm789$stwo), max(pm789$stwo), length=1000)
regspline_g1_K3_grid <- regspline(stwoGrid,g=1,K=3)
regspline_g2_K3_grid <- regspline(stwoGrid,g=2,K=3)
regspline_g3_K3_grid <- regspline(stwoGrid,g=3,K=3)
regspline_g4_K3_grid <- regspline(stwoGrid,g=4,K=3)
regspline_g5_K3_grid <- regspline(stwoGrid,g=5,K=3)
regspline_g6_K3_grid <- regspline(stwoGrid,g=6,K=3)
regspline_g7_K3_grid <- regspline(stwoGrid,g=7,K=3)
regspline_g8_K3_grid <- regspline(stwoGrid,g=8,K=3)

# Schaetzung der Modelle
m_g1_K3 <-lm(log10(PM10_Prin)~regspline_g1_K3-1,data=pm789)
m_g2_K3 <-lm(log10(PM10_Prin)~regspline_g2_K3-1,data=pm789)
m_g3_K3 <-lm(log10(PM10_Prin)~regspline_g3_K3-1,data=pm789)
m_g4_K3 <-lm(log10(PM10_Prin)~regspline_g4_K3-1,data=pm789)
m_g5_K3 <-lm(log10(PM10_Prin)~regspline_g5_K3-1,data=pm789)
m_g6_K3 <-lm(log10(PM10_Prin)~regspline_g6_K3-1,data=pm789)
m_g7_K3 <-lm(log10(PM10_Prin)~regspline_g7_K3-1,data=pm789)
m_g8_K3 <-lm(log10(PM10_Prin)~regspline_g8_K3-1,data=pm789)
dim(model.matrix(m_g1_K3))
dim(pm789)

# Vorhergesagte Werte
ydach_g1_K3 <- regspline_g1_K3_grid %*% m_g1_K3$coefficients
ydach_g2_K3 <- regspline_g2_K3_grid %*% m_g2_K3$coefficients
ydach_g3_K3 <- regspline_g3_K3_grid %*% m_g3_K3$coefficients
ydach_g4_K3 <- regspline_g4_K3_grid %*% m_g4_K3$coefficients
ydach_g5_K3 <- regspline_g5_K3_grid %*% m_g5_K3$coefficients
ydach_g6_K3 <- regspline_g6_K3_grid %*% m_g6_K3$coefficients
ydach_g7_K3 <- regspline_g7_K3_grid %*% m_g7_K3$coefficients
ydach_g8_K3 <- regspline_g8_K3_grid %*% m_g8_K3$coefficients
ydach_g <- list( ydach_g1_K3, ydach_g2_K3, ydach_g3_K3, ydach_g4_K3,
                 ydach_g5_K3, ydach_g6_K3, ydach_g7_K3, ydach_g8_K3 )

# Interpretation der Koeffizienten
layout(1)
summary(m_g3_K3)
boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".",
        "log10(PM10)", main="g = 3", xlab="Wochenstunde",
        border="gray50") 
lines(stwoGrid, regspline_g3_K3_grid[,1] * m_g3_K3$coefficients[1], col=2)
lines(stwoGrid, regspline_g3_K3_grid[,1:2] %*% m_g3_K3$coefficients[1:2], col=2)
lines(stwoGrid, regspline_g3_K3_grid[,1:3] %*% m_g3_K3$coefficients[1:3], col=2)
abline(v=seq(min(pm789$stwo), max(pm789$stwo), length=5)+0.5)
lines(stwoGrid, regspline_g3_K3_grid[,1:4] %*% m_g3_K3$coefficients[1:4], col=2)
lines(stwoGrid, regspline_g3_K3_grid[,1:5] %*% m_g3_K3$coefficients[1:5], col=2)
lines(stwoGrid, regspline_g3_K3_grid[,1:6] %*% m_g3_K3$coefficients[1:6], col=2)
lines(stwoGrid, regspline_g3_K3_grid[,1:7] %*% m_g3_K3$coefficients[1:7], col=2)
# beta: Koeffizenten fuer Potenzen
# gamma: Koeffizienten fuer trunkierte Potenzen 3. Grades

## ----Aufgabe2-e2, fig.width=7.6, fig.height=10.6-------------------------
par(mfrow=c(4,2))
for(i in 1:8){
  boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".",
          "log10(PM10)", main=paste("g =", i), xlab="Wochenstunde",
          border="gray50")
  lines(stwoGrid, ydach_g[[i]], col=2,lwd=3)
  abline(v=(0:7)*24+1)
}

## ----Aufgabe2-e3, fig.width=7.6, fig.height=5.3--------------------------
# Berechnung der modifizierten Variablen fuer verschiedene Knotenzahlen
regspline_g3_K3 <- regspline(pm789$stwo,g=3,K=3)
regspline_g3_K5 <- regspline(pm789$stwo,g=3,K=5)
regspline_g3_K10 <- regspline(pm789$stwo,g=3,K=10)
regspline_g3_K30 <- regspline(pm789$stwo,g=3,K=30)

 # Anlegen eines Gitters, damit Funktion "glatt" gezeichnet werden kann
regspline_g3_K3_grid <- regspline(stwoGrid,g=3,K=3)
regspline_g3_K5_grid <- regspline(stwoGrid,g=3,K=5)
regspline_g3_K10_grid <- regspline(stwoGrid,g=3,K=10)
regspline_g3_K30_grid <- regspline(stwoGrid,g=3,K=30)

# Schaetzung der Modelle
m_g3_K3 <-lm(log10(PM10_Prin)~regspline_g3_K3-1,data=pm789)
m_g3_K5 <-lm(log10(PM10_Prin)~regspline_g3_K5-1,data=pm789)
m_g3_K10 <-lm(log10(PM10_Prin)~regspline_g3_K10-1,data=pm789)
m_g3_K30 <-lm(log10(PM10_Prin)~regspline_g3_K30-1,data=pm789)

# Vorhergesagte Werte
ydach_g3_K3 <- regspline_g3_K3_grid %*% m_g3_K3$coefficients
ydach_g3_K5 <- regspline_g3_K5_grid %*% m_g3_K5$coefficients
ydach_g3_K10 <- regspline_g3_K10_grid %*% m_g3_K10$coefficients
ydach_g3_K30 <- regspline_g3_K30_grid %*% m_g3_K30$coefficients
ydach_K <- list( ydach_g3_K3, ydach_g3_K5, ydach_g3_K10, ydach_g3_K30 )

par(mfrow=c(2,2))
for(i in 1:4){
  boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".",
          "log10(PM10)", main=paste("K =",c(3,4,10,30)[i]), xlab="Wochenstunde",
          border="gray50")
  lines(stwoGrid, ydach_K[[i]], col=2, lwd=3)
  abline(v=(0:7)*24+1)
}

## ----Aufgabe2e-addon, fig.width=5.6, fig.height=3.6----------------------
library(mgcv)
gam.uwz <- gam(log10(PM10_Prin) ~ s(stwo, k=30), data = pm789)
layout(1)
plot(gam.uwz)
abline(v=(0:7)*24+0.5)

## ----Aufgabe2-f, fig.width=5.6, fig.height=3.6---------------------------
# Modell
regspline_g3_K30_2 <- regspline_g3_K30[,-1]
regspline_g3_K30_grid2 <- regspline_g3_K30_grid[,-1]
modTP <- lm( log10(PM10_Prin) ~ log(PM10_Joh) + regspline_g3_K30_2*UZ ,
             data=pm789)

# Praediktionen
ydach_keineUWZ <- cbind( 1, mean(log(pm789$PM10_Joh),na.rm=TRUE), 
                         regspline_g3_K30_grid2 ) %*% 
  modTP$coefficients[1:35]
ydach_UWZ <- cbind( 1, mean(log(pm789$PM10_Joh),na.rm=TRUE), 
                    regspline_g3_K30_grid2, 1, 
                    regspline_g3_K30_grid2) %*% 
  modTP$coefficients

# Plot
boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".",
        "log10(PM10)", xlab="Wochenstunde",
        border="gray50")
lines(stwoGrid, ydach_keineUWZ, col=2, lwd=3)
lines(stwoGrid, ydach_UWZ, col=3, lwd=3)
abline(v=(0:7)*24+1)

mydach_keineUWZ <- mean(ydach_keineUWZ)
mydach_UWZ <- mean(ydach_UWZ)
10^(mean(ydach_UWZ)-mean(ydach_keineUWZ))

## ----Aufgabe2-g----------------------------------------------------------
plot(acf(residuals(modTP)))
library(car)
dwt(modTP)