## ----Aufgabe1-a---------------------------------------------------------- setwd("C:\\Exchange\\Work\\Teaching\\Limo\\LiMo15\\Uebung\\Blatt10") load("C:\\Exchange\\Work\\Teaching\\Limo\\LiMo15\\daten\\pm789.Rdata") # Funktion zur Berechnung von Basisfunktionswerten regspline <- function(kovariable, K, g){ # Bestimmung der Knoten knoten <- seq(min(kovariable), max(kovariable), length=K+2) # Anlegen der Design-Matrix X <- matrix(0, length(kovariable), K+g+1) # Polynome (in den ersten g+1 Spalten der Designmatrix) for(i in 0:g) { X[,i+1] <- kovariable^i } # abgeschnittene Potenzen (von der (g+2)-ten bis zur letzten Spalte der Designmatrix) for(i in 2:(K+1)){ X[,i+g] <- (kovariable > knoten[i]) * pmax((kovariable - knoten[i]), 0)^g } return(X) } stwoGrid <- seq(min(pm789$stwo), max(pm789$stwo), length=1000) regspline_g3_K30 <- regspline(pm789$stwo,g=3,K=30) regspline_g3_K30_2 <- regspline_g3_K30[,-1] library(nlme) # Nummerierung der Wochen obsPerWeek <- c(which(diff(pm789$stwo)<0)[1], diff(c(which(diff(pm789$stwo)<0),length(pm789$stwo)))) pm789$week <- rep(1:length(obsPerWeek),obsPerWeek) # Nummerierung der Zeitpunkte pm789$t <- 1:nrow(pm789) modTP <- gls( log10(PM10_Prin) ~ log10(PM10_Joh) + regspline_g3_K30_2*UZ , na.action="na.omit", data=pm789, method="ML") modTP_ar1 <- gls( log10(PM10_Prin) ~ log10(PM10_Joh) + regspline_g3_K30_2*UZ , na.action="na.omit", data=pm789, method="ML", correlation=corAR1(form=~t|week) ) save(modTP_ar1,file="modTP_ar1.Rdata") load("modTP_ar1.Rdata") summary(modTP_ar1)$tTable ## ----Aufgabe1-b, fig.width = 3, fig.height = 3--------------------------- # ACF-Plots plot(ACF(modTP,resType="normalized",form=~t|week)) plot(ACF(modTP_ar1,resType="normalized",form=~t|week)) ## ----Aufgabe1-c---------------------------------------------------------- regspline_g3_K30_2_red <- regspline_g3_K30_2[!is.na(pm789$UZ),] modTP_ar1_sub1 <- gls( log10(PM10_Prin) ~ log10(PM10_Joh) + regspline_g3_K30_2_red , na.action="na.omit", data=subset(pm789,!is.na(UZ)), correlation=corAR1(form=~t|week), method="ML" ) modTP_ar1_sub2 <- gls( log10(PM10_Prin) ~ log10(PM10_Joh) + regspline_g3_K30_2+UZ , na.action="na.omit", data=pm789, correlation=corAR1(form=~t|week), method="ML" ) save(modTP_ar1_sub1,file="modTP_ar1_sub1.Rdata") save(modTP_ar1_sub2,file="modTP_ar1_sub2.Rdata") load("modTP_ar1_sub1.Rdata") load("modTP_ar1_sub2.Rdata") anova(modTP_ar1, modTP_ar1_sub1) anova(modTP_ar1, modTP_ar1_sub2) ## ----Aufgabe2-a, fig.width = 5, fig.height = 5--------------------------- zuf <- read.table("http://www.stat.uni-muenchen.de/~abender/unterlagen/limo14/zufriedenheit.txt", header = TRUE) # Deskriptive Analyse boxplot(zuf$wellbeing) # Wert "6" sehr haeufig table(zuf$wellbeing)# nur quasimetrisch (nur diskrete Ausprägungen (in den Daten)) boxplot(zuf$wellbeing ~ zuf$west) # im Westen etwas hoehere Werte, aber auch staerkere Streuung boxplot(zuf$wellbeing ~ zuf$female) # etwas hoehere Werte fuer Frauen boxplot(zuf$wellbeing ~ zuf$year) # keine Tendenz zu erkennen plot(zuf$age, zuf$wellbeing) # wenig zu erkennen, da viele Beobachtungen uebereinander, deshalb mit Zufallsfehler: plot(zuf$age, zuf$wellbeing + rnorm(n=length(zuf$wellbeing), mean=0, sd=0.2)) # keine Tendenz zu erkennen plot(zuf$year, zuf$wellbeing + rnorm(n=length(zuf$wellbeing), mean=0, sd=0.2)) # keine Tendenz zu erkennen # individuelle Verlaeufe der ersten neun Personen library(ggplot2) ggplot(data = subset(zuf, subject < 10), aes(x = year, y = wellbeing)) + geom_point() + geom_line() + facet_wrap(~ subject, nrow = 3) + theme_bw() # meist nur geringe intrapersonelle Schwankungen ## alternativ: # library(lattice) # xyplot(wellbeing ~ year | subject, data=zuf, subset=subject<10, as.table = TRUE) ## ----Aufgabe2-b, fig.width = 5, fig.height = 5--------------------------- X.reg <- regspline(zuf$age, K = 3, g = 3) X.reg <- X.reg[, -1] colnames(X.reg) <- c("age", "age2", "age3", "age.tp1", "age.tp2", "age.tp3") ## Hinzufügen der anderen Variablen X.reg <- cbind(X.reg, zuf[, c("subject", "wellbeing", "west", "female", "year")]) ## lineares Modell limo <- lm(wellbeing ~ . - subject, data = X.reg) gls0 <- gls(wellbeing ~ . - subject, data = X.reg) summary(limo) plot(limo, which=1) # "Linien" im Residuenplot, weil Zielgroesse diskret plot(limo, which=2) ## geschaetzte Werte und Residuen fuer die erste neun Personen # fitted zuf$fitted.limo <- fitted(limo) zuf$residuals.limo <- residuals(limo) ggplot(subset(zuf, subject < 10), aes(x = year, y = wellbeing)) + geom_point() + geom_line(aes(y = fitted.limo)) + facet_wrap(~subject, nrow = 3) + theme_bw() # residuen ggplot(subset(zuf, subject < 10), aes(x = year, y = residuals.limo)) + geom_point() + geom_hline(yintercept = 0, col = 2, lty = 2) + facet_wrap(~subject, nrow = 3) + theme_bw() ##alternativ: # xyplot(fitted(limo) ~ year | subject, data=zuf, subset=subject<10) # xyplot(residuals(limo) ~ year | subject, data=zuf, subset=subject<10) # betrachte Autokorrelation der Residuen acf(residuals(limo)) # personenweise Autokorrelation der Residuen plot(ACF(gls0,form=~1|subject,data=zuf)) ## ----Aufgabe2-c, fig.width = 5, fig.height = 5--------------------------- # Verteilung der Residuen für jede Person boxplot(residuals(limo)~zuf$subj) # Residuen einzelner Personen streuen unterschiedlich stark # Schaetzung der Gewichte vars <- aggregate(residuals(limo), by=list(zuf$subj), FUN=var)$x # Berechnung der Diagonale von V obsPerSubj <- as.numeric(table(zuf$subj)) diagV <- rep(vars, obsPerSubj) names(vars) <- 1:length(vars) # varsRed <- vars[-1] # Gewichtetes LM weightedLm <- lm(wellbeing ~ . - subject, data=X.reg, weights=diagV^(-0.5) ) # Verallgemeinerte KQ-Methode gls1 <- gls(wellbeing ~ . -subject, data=X.reg, weights=varFixed(~diagV^0.5)) # REML-Schaetzung #gls2 <- gls(wellbeing ~ . -subject, data=X.reg, weights=varIdent(form=~1|subject)) #save(gls2, file="gls2.Rdata") load("gls2.Rdata") # Vergleich der Parameterschaetzungen ps <- cbind(coef(gls0), coef(weightedLm), coef(gls1), coef(gls2)) dotchart(t(ps[8:10,]), xlab="estimate") ageGrid <- seq(min(zuf$age), max(zuf$age), length=1000) X.reg_grid <- regspline(ageGrid, g=3, K=3) pred_gls0_om <- as.numeric(coef(gls0)%*%t(cbind(X.reg_grid,0,0,1990))) pred_weightedLm_om <- as.numeric(coef(weightedLm)%*%t(cbind(X.reg_grid,0,0,1990))) pred_gls1_om <- as.numeric(coef(gls1)%*%t(cbind(X.reg_grid,0,0,1990))) pred_gls2_om <- as.numeric(coef(gls2)%*%t(cbind(X.reg_grid,0,0,1990))) plot(ageGrid,pred_gls0_om, type="l", xlab="Alter", ylab="Geschätzte Lebenszufriedenheit", ylim=c(4,7)) lines(ageGrid,pred_weightedLm_om, col=2) lines(ageGrid,pred_gls1_om, col=3) lines(ageGrid,pred_gls2_om, col=4) legend("topright", lty=1, col=1:4, legend=c("gls0","weightedLm","gls1","gls2")) # Vergleich: empirisch ermittelte Gewichte vs. simultan geschaetze plot(vars^0.5,c(1,coef(gls2$model, unconstrained=FALSE))) ## ----Aufgabe2-d, fig.width = 5, fig.height = 5--------------------------- # Modell mit AR(1)-Prozess fuer die Residuen #gls3 <- gls(wellbeing ~ . - subject, weights=varIdent(form=~1|subject), # data=X.reg, correlation=corAR1(form=~1|subject)) #save(gls3,file="gls3.Rdata") load("gls3.Rdata") summary(gls3) pred_gls3_om <- as.numeric(coef(gls3)%*%t(cbind(X.reg_grid,0,0,1990))) ps <- cbind(coef(gls0), coef(weightedLm), coef(gls1), coef(gls2), coef(gls3)) dotchart(t(ps[8:10,]), xlab="estimate") plot(ageGrid,pred_gls0_om, type="l", xlab="Alter", ylab="Geschätzte Lebenszufriedenheit", ylim=c(4,7)) lines(ageGrid,pred_weightedLm_om, col=2) lines(ageGrid,pred_gls1_om, col=3) lines(ageGrid,pred_gls2_om, col=4) lines(ageGrid,pred_gls3_om, col=5) legend("topright", lty=1, col=1:5, legend=c("gls0","weightedLm","gls1","gls2","gls3")) # Schaetzer veraendern sich kaum, aber nun groessere p-Werte ## ----Aufgabe2-e, fig.width = 5, fig.height = 5--------------------------- # Personen-spezifische Mittelwerte der Residuen aus gls3 plot(aggregate(residuals(gls3,type="normalized"),by=list(zuf$subject),FUN=mean)$x) # ALM mit random intercept (Berechnung ohne subjekt-spezifische Residualvarianzen, # da Konvergenzprobleme) lme <- lme(wellbeing ~ age+age2+age3+age.tp1+age.tp2+age.tp3+west+female+year, data=X.reg, correlation=corAR1(form=~1|subject), random=~1|subject) summary(lme) # Alterseffekt pred_lme_om <- as.numeric(fixef(lme)%*%t(cbind(X.reg_grid,0,0,1990))) plot(ageGrid,pred_lme_om, type="l", xlab="Alter", ylab="Geschätzte Lebenszufriedenheit", ylim=c(4,7))