# @Author: andreas.bender@statistik.lmu.de # @Date: 2014-02-27 20:59:40 # @Last Modified by: andreas.bender@statistik.lmu.de # @Last Modified time: 2015-04-22 16:37:50 ## ----setupBlatt1, echo=FALSE--------------------------------------------- library(ggplot2) theme_set(theme_bw()) library(gridExtra) ## ----Aufgabe1a, echo=TRUE,----------------------------------------------- auto <- read.table("http://www.statistik.lmu.de/~abender/daten/Datensatz1.txt", col.names = c("Weight", "MPG")) str(auto) summary(auto) gg.auto <- ggplot(auto, aes(x = Weight, y = MPG)) + geom_point() gg.auto lm1 <- lm(MPG ~ Weight, data = auto) summary(lm1) coef.lm1 <- round(coef(lm1), 3) ## ----meanVsBeta0, eval = FALSE------------------------------------------- mean(auto$MPG) coef.lm1[1] coef(update(lm1, .~. - Weight)) ## ----Aufgabe1b----------------------------------------------------------- coef.lm1 <- coefficients(summary(lm1)) coef.lm1["(Intercept)", "Estimate"]/coef.lm1["(Intercept)", "Std. Error"] ## ----Aufgabe1c----------------------------------------------------------- ## add-on # ?pt # allgemein q <- seq(-5,5, by = 0.1) df <- 30 plot(q, dt(q, df = 30), type = "l") q005.lower <- qt(p = 0.05, df = 30) q005.upper <- qt(p = 0.05, df = 30, lower.tail = FALSE) abline(v = c(q005.lower, q005.upper), lty = 2) q025.lower <- qt(p = 0.025, df = 30) q025.upper <- qt(p = 0.025, df = 30, lower.tail = FALSE) abline(v = c(q025.lower, q025.upper), lty = 3) pt(q = q025.lower, df = 30) coefs <- coefficients(summary(lm1)) q.beta0 <- coefs["(Intercept)", "Estimate"]/coefs["(Intercept)", "Std. Error"] 1 - 2*pt(q = q.beta0, df = 30) ## ----Aufgabe1d----------------------------------------------------------- auto$GPM <- 1/auto$MPG lm2 <- lm(GPM ~ Weight, data = auto) summary(lm2) ## ----Aufgabe1d-1, fig.width=10, fig.height=5----------------------------- gg.auto <- ggplot(auto) + xlab("Gewicht") gg.original <- gg.auto + aes(x = Weight, y = MPG) + geom_point() + ggtitle("Ursprüngliche Daten") + geom_smooth(method="lm", se=FALSE) gg.transformed <- gg.auto + aes(x=Weight, y = GPM) + geom_point() + ggtitle("Transformierte Daten") + geom_smooth(method="lm", se=FALSE) grid.draw( cbind( ggplotGrob(gg.original), ggplotGrob(gg.transformed), size = "last")) ## ----Aufgabe1f----------------------------------------------------------- auto$Gewicht <- auto$Weight/2.2046 auto$Verbrauch <- auto$GPM * 0.6214 * 100/0.22 lm3 <- lm(Verbrauch ~ Gewicht, data=auto) summary(lm3) ## ----Aufgabe2-a---------------------------------------------------------- auto <- read.table("http://www.statistik.lmu.de/~abender/daten/Datensatz1.txt", col.names = c("Weight", "MPG")) auto$GPM <- 1/auto$MPG auto$cWeight <- scale(auto$Weight, center = TRUE, scale = FALSE) library(ggplot2) library(reshape2) ggplot(melt(auto[, c("Weight", "cWeight")]), aes(x = variable, y = value)) + geom_violin() + geom_boxplot(width = 0.5) + geom_hline(y = 0, col = 2, lty = 3) + xlab("Variable") + ylab("Gewicht") ## Traditionell # boxplot(auto[, c("Weight", "cWeight")], las = 1, ) # abline(h = 0, lty = 3, col = 2, lwd = 2) lm.orig <- lm(GPM ~ Weight, data = auto) lm.c <- lm(GPM ~ cWeight, data = auto) summary(lm.c) ## ----Aufgabe2-b---------------------------------------------------------- coef(lm.orig)["(Intercept)"] + mean(auto[["Weight"]])* coef(lm.orig)["Weight"] ## ----Aufgabe2-c---------------------------------------------------------- ## Overall F-Test: SST <- sum((auto$GPM - mean(auto$GPM))^2) SSE <- deviance(lm.c) F <- ((SST - SSE)/(2 - 1))/(SSE/(df.residual(lm.c))) 1 - pf(F, 1, df.residual(lm.c)) ## ----Aufgabe3-d---------------------------------------------------------- auto$sWeight <- scale(auto$Weight, center = TRUE, scale = TRUE) auto$sGPM <- scale(auto$GPM) lm.s <- lm(sGPM ~ sWeight, data = auto) summary(lm.s) all.equal(cor(auto$GPM, auto$Weight), coef(lm.s)["sWeight"], check.attributes = FALSE) ## ----Aufgabe4-b---------------------------------------------------------- # x-Werte der Beispieldaten x <- c(2, 4, 6, 8) # y-Werte der Beispieldaten y <- c(2, 2, 4, 4) plot(x, y, xlim=c(0,9), ylim=c(0,9), main="Regressionsgeraden und Residuen") # "normale" Regression von y auf x lm1 <- lm(y ~ x) summary(lm1) abline(a=lm1$coefficients[1], b=lm1$coefficients[2], col=3) # alternativ: #abline(lm1, col=3) # Umkehrregression lm2 <- lm(x ~ y) summary(lm2) # Achsenabschnitt und Steigung der Geraden zur Umkehrregression -lm2$coefficients[1]/lm2$coefficients[2] 1/lm2$coefficients[2] abline(a=(-lm2$coefficients[1]/lm2$coefficients[2]), b=(1/lm2$coefficients[2]), col=2) legend(0, 9, legend=c("Regression von Y auf X", "Umkehrregression"), col=c(3,2), lty=1) # Residuen bei "normaler" Regression lines(c(2, 2), c(2, lm1$coefficients[1] + lm1$coefficients[2] * 2), col=3, lty=2) lines(c(4, 4), c(2, lm1$coefficients[1] + lm1$coefficients[2] * 4), col=3, lty=2) lines(c(6, 6), c(4, lm1$coefficients[1] + lm1$coefficients[2] * 6), col=3, lty=2) lines(c(8, 8), c(4, lm1$coefficients[1] + lm1$coefficients[2] * 8), col=3, lty=2) # Residuen bei Umkehrregression lines(c(4, 3), c(2, 2), col=2, lty=2) lines(c(2, 3), c(2, 2), col=2, lty=2) lines(c(8, 7), c(4, 4), col=2, lty=2) lines(c(6, 7), c(4, 4), col=2, lty=2)