## ----Aufgabe1-a, fig.show="hide"----------------------------------------- library(faraway) teengamb$sexF <- factor(teengamb$sex, levels = c(0,1), labels = c("m", "w")) lm.gamb <- lm(gamble ~ income + sexF + status + verbal, data = teengamb) ## library zum plotten von Konfidenzellipsoiden laden library(car) # ?confidenceEllipse confidenceEllipse(lm.gamb, which.coef = c("status", "verbal"), las = 1) text(x = coef(lm.gamb)[c("status")] + 0.1, y = coef(lm.gamb)[c("verbal")] + 0.5, labels = expression(paste("(",hat(beta)[3], ", ", hat(beta)[4], ")")), pch = 19, cex = 1.2) ## ----Aufgabe1-b, fig.width=5.6, fig.height=5.6--------------------------- confidenceEllipse(lm.gamb, which.coef = c("status", "verbal"), las = 1) text(x = coef(lm.gamb)[c("status")] + 0.1, y = coef(lm.gamb)[c("verbal")] + 0.5, labels = expression(paste("(",hat(beta)[3], ", ", hat(beta)[4], ")")), pch = 19, cex = 1.2) ## Ursprung points(0,0, pch = 19, col = 1, cex = 1.2) text(x = 0+0.05,y = 0-0.35, labels = "(0,0)") ## normale KI abline(v = confint(lm.gamb)["status", ], lty = 2) abline(h = confint(lm.gamb)["verbal", ], lty = 2) ## Bonferroni KI abline(v=confint(lm.gamb,level=1-0.05/2)["status",],lty=2, col = 4) abline(h=confint(lm.gamb,level=1-0.05/2)["verbal",],lty=2, col = 4) ## ----Aufgabe1-c---------------------------------------------------------- # Naive p-Werte (naiveP <- summary(lm.gamb)$coefficients[c("status","verbal"),"Pr(>|t|)"]) # Bonferroni-adjustierte p-Werte p.adjust(naiveP, method = "bonferroni") # Adjustierung der p-Werte nach dem Maximum-Prinzip library(multcomp) glhtObj <- glht(lm.gamb,linfct=c("status = 0", "verbal = 0")) summary(glhtObj)$test$pvalues ## ----Aufgabe2-a---------------------------------------------------------- load("C:\\Exchange\\Work\\Teaching\\Limo\\LiMo15\\daten\\pm789.Rdata") mod <- lm( log10(PM10_Prin) ~ log10(PM10_Joh) + UZ, data=pm789 ) summary(mod) ## ----Aufgabe2-b, fig.width=5.6, fig.height=5.6--------------------------- library(effects) modWd <- lm( log10(PM10_Prin) ~ log10(PM10_Joh) + log10(PM10_Joh):cosWd + UZ , data=pm789) summary(modWd) plot(effect("log10(PM10_Joh)*cosWd",modWd)) ## ----Aufgabe2-c, fig.width=7.6, fig.height=5.6--------------------------- plot(pm789$stwo,log10(pm789$PM10_Prin)) boxplot(log(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".") abline(v=(0:7)*24+1) ## ----Aufgabe2-d, fig.width=7.6, fig.height=10.6-------------------------- # Polynomiale Schaetzungen modP1 <- lm(log10(PM10_Prin) ~ stwo, data=pm789) modP2 <- lm(log10(PM10_Prin) ~ poly(stwo, 2, raw=TRUE), data=pm789) # statt lm(log10(PM10_Prin) ~ stwo + I(stwo^2), data=pm789) modP3 <- lm(log10(PM10_Prin) ~ poly(stwo, 3, raw=TRUE), data=pm789) modP4 <- lm(log10(PM10_Prin) ~ poly(stwo, 4, raw=TRUE), data=pm789) modP5 <- lm(log10(PM10_Prin) ~ poly(stwo, 5, raw=TRUE), data=pm789) modP6 <- lm(log10(PM10_Prin) ~ poly(stwo, 6, raw=TRUE), data=pm789) modP7 <- lm(log10(PM10_Prin) ~ poly(stwo, 7, raw=TRUE), data=pm789) modP8 <- lm(log10(PM10_Prin) ~ poly(stwo, 8, raw=TRUE), data=pm789) modP <- list(modP1,modP2,modP3,modP4,modP5,modP6,modP7,modP8) par(mfrow=c(4,2)) for(i in 1:8){ boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".", "log10(PM10)", main=paste("g =",i), xlab="Wochenstunde", border="gray50") lines(pm789$stwo[!is.na(pm789$PM10_Prin)], fitted(modP[[i]]), col=2, lwd=2) abline(v=(0:7)*24+1) } # hierbei werden nur die geschaetzten Werte an den Datenpunkten eingezeichnet und # miteinander verbunden, aber nicht der Verlauf der geschaetzten Funktion dargestellt # Definition eines Gitters zur Darstellung des Verlaufs der geschaetzten Funktion zeitgitter <- seq(min(pm789$stwo), max(pm789$stwo), length=1000) # dazugehoerige Polynome zeit1 <- cbind(rep(1, times=1000), poly(zeitgitter, 1, raw=TRUE)) zeit2 <- cbind(rep(1, times=1000), poly(zeitgitter, 2, raw=TRUE)) zeit3 <- cbind(rep(1, times=1000), poly(zeitgitter, 3, raw=TRUE)) zeit4 <- cbind(rep(1, times=1000), poly(zeitgitter, 4, raw=TRUE)) zeit5 <- cbind(rep(1, times=1000), poly(zeitgitter, 5, raw=TRUE)) zeit6 <- cbind(rep(1, times=1000), poly(zeitgitter, 6, raw=TRUE)) zeit7 <- cbind(rep(1, times=1000), poly(zeitgitter, 7, raw=TRUE)) zeit8 <- cbind(rep(1, times=1000), poly(zeitgitter, 8, raw=TRUE)) zeit <- list(zeit1,zeit2,zeit3,zeit4,zeit5,zeit6,zeit7,zeit8) for(i in 1:8){ boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".", "log10(PM10)", main=paste("g =",i), xlab="Wochenstunde", border="gray50") lines(zeitgitter, (zeit[[i]] %*% modP[[i]]$coefficients), col=2, lwd=2) abline(v=(0:7)*24+1) } ## ----Aufgabe2-e, fig.width=5.3, fig.height=5.3--------------------------- regspline <- function(kovariable, K, g){ # Bestimmung der Knoten knoten <- seq(min(kovariable), max(kovariable), length=K+2) # Anlegen der Design-Matrix X <- matrix(0, length(kovariable), K+g+1) # Polynome (in den ersten g+1 Spalten der Designmatrix) for(i in 0:g) { X[,i+1] <- kovariable^i } # abgeschnittene Potenzen (von der (g+2)-ten bis zur letzten Spalte der Designmatrix) for(i in 2:(K+1)){ X[,i+g] <- (kovariable > knoten[i]) * pmax((kovariable - knoten[i]), 0)^g } return(X) } # Berechnung der modifizierten Variablen f?r verschiedene Polynom-Grade regspline_g1_K3 <- regspline(pm789$stwo,g=1,K=3) regspline_g2_K3 <- regspline(pm789$stwo,g=2,K=3) regspline_g3_K3 <- regspline(pm789$stwo,g=3,K=3) regspline_g4_K3 <- regspline(pm789$stwo,g=4,K=3) regspline_g5_K3 <- regspline(pm789$stwo,g=5,K=3) regspline_g6_K3 <- regspline(pm789$stwo,g=6,K=3) regspline_g7_K3 <- regspline(pm789$stwo,g=7,K=3) regspline_g8_K3 <- regspline(pm789$stwo,g=8,K=3) # Anlegen eines Gitters, damit Funktion "glatt" gezeichnet werden kann stwoGrid <- seq(min(pm789$stwo), max(pm789$stwo), length=1000) regspline_g1_K3_grid <- regspline(stwoGrid,g=1,K=3) regspline_g2_K3_grid <- regspline(stwoGrid,g=2,K=3) regspline_g3_K3_grid <- regspline(stwoGrid,g=3,K=3) regspline_g4_K3_grid <- regspline(stwoGrid,g=4,K=3) regspline_g5_K3_grid <- regspline(stwoGrid,g=5,K=3) regspline_g6_K3_grid <- regspline(stwoGrid,g=6,K=3) regspline_g7_K3_grid <- regspline(stwoGrid,g=7,K=3) regspline_g8_K3_grid <- regspline(stwoGrid,g=8,K=3) # Schaetzung der Modelle m_g1_K3 <-lm(log10(PM10_Prin)~regspline_g1_K3-1,data=pm789) m_g2_K3 <-lm(log10(PM10_Prin)~regspline_g2_K3-1,data=pm789) m_g3_K3 <-lm(log10(PM10_Prin)~regspline_g3_K3-1,data=pm789) m_g4_K3 <-lm(log10(PM10_Prin)~regspline_g4_K3-1,data=pm789) m_g5_K3 <-lm(log10(PM10_Prin)~regspline_g5_K3-1,data=pm789) m_g6_K3 <-lm(log10(PM10_Prin)~regspline_g6_K3-1,data=pm789) m_g7_K3 <-lm(log10(PM10_Prin)~regspline_g7_K3-1,data=pm789) m_g8_K3 <-lm(log10(PM10_Prin)~regspline_g8_K3-1,data=pm789) dim(model.matrix(m_g1_K3)) dim(pm789) # Vorhergesagte Werte ydach_g1_K3 <- regspline_g1_K3_grid %*% m_g1_K3$coefficients ydach_g2_K3 <- regspline_g2_K3_grid %*% m_g2_K3$coefficients ydach_g3_K3 <- regspline_g3_K3_grid %*% m_g3_K3$coefficients ydach_g4_K3 <- regspline_g4_K3_grid %*% m_g4_K3$coefficients ydach_g5_K3 <- regspline_g5_K3_grid %*% m_g5_K3$coefficients ydach_g6_K3 <- regspline_g6_K3_grid %*% m_g6_K3$coefficients ydach_g7_K3 <- regspline_g7_K3_grid %*% m_g7_K3$coefficients ydach_g8_K3 <- regspline_g8_K3_grid %*% m_g8_K3$coefficients ydach_g <- list( ydach_g1_K3, ydach_g2_K3, ydach_g3_K3, ydach_g4_K3, ydach_g5_K3, ydach_g6_K3, ydach_g7_K3, ydach_g8_K3 ) # Interpretation der Koeffizienten layout(1) summary(m_g3_K3) boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".", "log10(PM10)", main="g = 3", xlab="Wochenstunde", border="gray50") lines(stwoGrid, regspline_g3_K3_grid[,1] * m_g3_K3$coefficients[1], col=2) lines(stwoGrid, regspline_g3_K3_grid[,1:2] %*% m_g3_K3$coefficients[1:2], col=2) lines(stwoGrid, regspline_g3_K3_grid[,1:3] %*% m_g3_K3$coefficients[1:3], col=2) abline(v=seq(min(pm789$stwo), max(pm789$stwo), length=5)+0.5) lines(stwoGrid, regspline_g3_K3_grid[,1:4] %*% m_g3_K3$coefficients[1:4], col=2) lines(stwoGrid, regspline_g3_K3_grid[,1:5] %*% m_g3_K3$coefficients[1:5], col=2) lines(stwoGrid, regspline_g3_K3_grid[,1:6] %*% m_g3_K3$coefficients[1:6], col=2) lines(stwoGrid, regspline_g3_K3_grid[,1:7] %*% m_g3_K3$coefficients[1:7], col=2) # beta: Koeffizenten fuer Potenzen # gamma: Koeffizienten fuer trunkierte Potenzen 3. Grades ## ----Aufgabe2-e2, fig.width=7.6, fig.height=10.6------------------------- par(mfrow=c(4,2)) for(i in 1:8){ boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".", "log10(PM10)", main=paste("g =", i), xlab="Wochenstunde", border="gray50") lines(stwoGrid, ydach_g[[i]], col=2,lwd=3) abline(v=(0:7)*24+1) } ## ----Aufgabe2-e3, fig.width=7.6, fig.height=5.3-------------------------- # Berechnung der modifizierten Variablen fuer verschiedene Knotenzahlen regspline_g3_K3 <- regspline(pm789$stwo,g=3,K=3) regspline_g3_K5 <- regspline(pm789$stwo,g=3,K=5) regspline_g3_K10 <- regspline(pm789$stwo,g=3,K=10) regspline_g3_K30 <- regspline(pm789$stwo,g=3,K=30) # Anlegen eines Gitters, damit Funktion "glatt" gezeichnet werden kann regspline_g3_K3_grid <- regspline(stwoGrid,g=3,K=3) regspline_g3_K5_grid <- regspline(stwoGrid,g=3,K=5) regspline_g3_K10_grid <- regspline(stwoGrid,g=3,K=10) regspline_g3_K30_grid <- regspline(stwoGrid,g=3,K=30) # Schaetzung der Modelle m_g3_K3 <-lm(log10(PM10_Prin)~regspline_g3_K3-1,data=pm789) m_g3_K5 <-lm(log10(PM10_Prin)~regspline_g3_K5-1,data=pm789) m_g3_K10 <-lm(log10(PM10_Prin)~regspline_g3_K10-1,data=pm789) m_g3_K30 <-lm(log10(PM10_Prin)~regspline_g3_K30-1,data=pm789) # Vorhergesagte Werte ydach_g3_K3 <- regspline_g3_K3_grid %*% m_g3_K3$coefficients ydach_g3_K5 <- regspline_g3_K5_grid %*% m_g3_K5$coefficients ydach_g3_K10 <- regspline_g3_K10_grid %*% m_g3_K10$coefficients ydach_g3_K30 <- regspline_g3_K30_grid %*% m_g3_K30$coefficients ydach_K <- list( ydach_g3_K3, ydach_g3_K5, ydach_g3_K10, ydach_g3_K30 ) par(mfrow=c(2,2)) for(i in 1:4){ boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".", "log10(PM10)", main=paste("K =",c(3,4,10,30)[i]), xlab="Wochenstunde", border="gray50") lines(stwoGrid, ydach_K[[i]], col=2, lwd=3) abline(v=(0:7)*24+1) } ## ----Aufgabe2e-addon, fig.width=5.6, fig.height=3.6---------------------- library(mgcv) gam.uwz <- gam(log10(PM10_Prin) ~ s(stwo, k=30), data = pm789) layout(1) plot(gam.uwz) abline(v=(0:7)*24+0.5) ## ----Aufgabe2-f, fig.width=5.6, fig.height=3.6--------------------------- # Modell regspline_g3_K30_2 <- regspline_g3_K30[,-1] regspline_g3_K30_grid2 <- regspline_g3_K30_grid[,-1] modTP <- lm( log10(PM10_Prin) ~ log(PM10_Joh) + regspline_g3_K30_2*UZ , data=pm789) # Praediktionen ydach_keineUWZ <- cbind( 1, mean(log(pm789$PM10_Joh),na.rm=TRUE), regspline_g3_K30_grid2 ) %*% modTP$coefficients[1:35] ydach_UWZ <- cbind( 1, mean(log(pm789$PM10_Joh),na.rm=TRUE), regspline_g3_K30_grid2, 1, regspline_g3_K30_grid2) %*% modTP$coefficients # Plot boxplot(log10(PM10_Prin) ~ stwo, data = pm789, las = 1, pch=".", "log10(PM10)", xlab="Wochenstunde", border="gray50") lines(stwoGrid, ydach_keineUWZ, col=2, lwd=3) lines(stwoGrid, ydach_UWZ, col=3, lwd=3) abline(v=(0:7)*24+1) mydach_keineUWZ <- mean(ydach_keineUWZ) mydach_UWZ <- mean(ydach_UWZ) 10^(mean(ydach_UWZ)-mean(ydach_keineUWZ)) ## ----Aufgabe2-g---------------------------------------------------------- plot(acf(residuals(modTP))) library(car) dwt(modTP)