library(lattice) library(nlme) ################################################################################ # Exercise 2 # Mixed effects model with random intercept ################################################################################ # Erzeuge Daten fuer 100m-Lauf set.seed(123) n1 = 10 n2 = n1 n = n1 + n2 M = 3 b = rep(rnorm(n1 + n2, 0, 0.5), each = M) hist(b) sex = c(rep(0,n1), rep(1,n2)) beta = c(rep(10.6, n1 * M), rep(11.9, n2 * M)) eps = rnorm(n, 0, 0.2) times = beta + b + eps athlete = rep(1:n, each = M) plot(times ~ athlete, col = athlete, xlab = "LaeuferIn", ylab = "Zeit [sek]") abline(v=10.5) #sprint <- data.frame(athlete, sex=rep(sex, each=M), times) sprint <- data.frame(athlete=factor(athlete), sex=factor(rep(sex, each=M)), times) str(sprint) # have a look at the data plot(sprint) ###### (2a) # sprint0 <- groupedData(times~1|athlete, data=sprint, outer=~sex, # labels=list(x="", y="time"), units=list(x="", y="sec")) sprint0 <- groupedData(times ~ 1|athlete, data=sprint, outer=~sex) # outer: Faktoren, wie zB Geschlecht oder Behandlungsgruppe, # die invariant sind bezueglich der subjektspezifischen Beobachtungen # bei mehreren Faktoren zB: outer=sex*nation oder outer=nation*sex # inner: Faktoren, die sich innerhalb der Gruppierungsstruktur aendern # und als feste Effekte modelliert werden sollen; # hier zB Art der Diaet, die der Soprtler am Tag des Laufes bekommen hat # wenn sich die Diaet ueber die Tage und die Sportler aendert. # wenn man Faktoren, die sich innerhalb der Gruppierungsvariable aendern als # zufaellige Effekte modellieren moechte, sollte man sie als genestete # Gruppierungsstruktur angeben; # hier zB Info auf welcher Bahn der Lauf stattfindet, wenn es verschiedenen Bahnen gibt, # die von den Sportlern verwendet werden. ## verschiedene Mgl um ein groupedData-Objekt zu plotten ## ?plot.nmGroupedData plot(sprint0, outer=~sex) plot(sprint0, outer=TRUE) # use the specification of outer in the groupedData-object plot(sprint0, inner=~athlete) plot(sprint0, inner=~athlete, outer=~sex) plot.design(sprint0) ###### (2b) # fit the model with ML sprintML <- lme(times ~ sex, random=~1|athlete, data=sprint, method="ML") summary(sprintML) # fit the model with REML sprintREML <- lme(times ~ sex, random=~1|athlete, data=sprint, method="REML") summary(sprintREML) # compare variance components getVarCov(sprintML) # kleinere geschaetze Varianzen getVarCov(sprintREML) # compare fixed effects fixef(sprintML) fixef(sprintREML) # sind i.A. nicht gleich # compare random effects cbind("ML"=ranef(sprintML), "REML"=ranef(sprintREML)) ###### (2c) # compute fixed effects by hand # design matrices and response X <- cbind(rep(1, 60), as.numeric(levels(sprint$sex)[sprint$sex]) ) Z <- kronecker(diag(20), rep(1,3)) y <- sprint$times # Covariance matrices R <- diag(rep(sprintML$sigma^2, 60)) # covariance of residuals G <- diag(rep(getVarCov(sprintML), 20)) # covariance of random effects V <- Z %*% G %*% t(Z) + R # covariance of response # estimate beta hatBeta <- solve(t(X) %*% solve(V) %*% X) %*% t(X) %*% solve(V) %*% y # compare estimates hatBeta fixef(sprintML) ###### (2d) # Plot the original data with(sprint, plot(times ~ athlete, xlab = "LaeuferIn", ylab = "Zeit [sek]")) # add predictions of fixed effects with(sprint, points(predict(sprintREML, level=0)~athlete, pch=2, col=3, lwd=2)) # add predictions of fixed and random effects with(sprint, points(predict(sprintREML)~athlete, pch=3, col=4, lwd=2)) legend("bottomright", legend=c("observed", "fixef", "fixef+ranef"), pch=1:3, col=c(1,3,4)) ###### (2e) # extra: nation # -> nested design: athlete %in% nation nationAthlete <- sample(1:4, 20, replace=TRUE) nation <- rep(nationAthlete, each=M) sprint$nation <- factor(nation, levels=1:4, labels=c("A","B","C","D")) with(sprint, plot(times ~ athlete, col = NULL, border= nationAthlete, xlab = "LaeuferIn", ylab = "Zeit [sek]")) abline(v=10.5) with(sprint, plot(times ~ nation, xlab = "Nation", ylab = "Zeit [sek]", border= nationAthlete)) fm2sprint <- lme(times ~ sex, random=~1|nation/athlete, data=sprint) summary(fm2sprint) fixef(fm2sprint) ranef(fm2sprint) # ###### (2f) # ###### Compare fixed and random effects # # fit a model with fixed effects for persons # sprintFixed <- lm(times ~ -1 + athlete, data=sprint) # # # compare estimates with those of the model with fixed sex and random effect per person # # the results are very similar # cbind("Fixed"=predict(sprintFixed), "Fixed+Random"=predict(sprintREML)) # predict(sprintFixed)-predict(sprintREML) # # # Plot the original data # with(sprint, plot(times ~ athlete, xlab = "LaeuferIn", ylab = "Zeit [sek]")) # # add predictions of fixed effects # with(sprint, points(predict(sprintREML, level=0)~athlete, pch=2, col=3, lwd=2)) # # add predictions of fixed and random effects # with(sprint, points(predict(sprintREML)~athlete, pch=3, col=4, lwd=2)) # # add predictions of model with fixed effects for persons # with(sprint, points(predict(sprintFixed)~athlete, pch=4, col=5, lwd=2)) # legend("bottomright", legend=c("observed", "fixef", "fixef+ranef", "fixef+fixef"), pch=1:4, col=c(1,3,4,5)) ################################################################################ # data(package="nlme") # Two data examples of the book # Pinheiro, J. C.; Bates, D. M. (2000). Mixed-Effects Models in S and S-PLUS. # Springer, New York. ################################################################################ # Exercise 3 # Mixed-Effects Model for Replicated, Blocked Designs ################################################################################ ##### have a look at the data ?Machines # Details # Data on an experiment to compare three brands of machines used in an industrial # process are presented in Milliken and Johnson (p. 285, 1992). Six workers were # chosen randomly among the employees of a factory to operate each machine three times. # The response is an overall productivity score taking into account the number and quality # of components produced. # Machines data ist als groupedData-Object gespeichert class(Machines) ?groupedData # Plot the mean productivity scores for each worker over the 3 machines with(Machines, interaction.plot(Machine, Worker, score, las=1)) # with(Machines, boxplot(score ~ Worker)) # with(Machines, boxplot(score ~ Machine)) # with(Machines, boxplot(score ~ Machine + Worker)) # with(Machines, boxplot(score ~ Worker + Machine)) pdf("Machines.pdf") with(Machines, plot.default(score ~ as.numeric(levels(Worker)[Worker]), main="Machines", pch=as.numeric(Machine), xlab="Worker", ylab="score")) legend("bottomleft", title="Machine", legend=c("A","B","C"), pch=1:3) dev.off() with(Machines, boxplot(score ~ Worker + Machine, las=2)) ################ # # model with fixed effects for the type of machine # fm0Machine <- lme(score ~ Machine, data=Machines) # summary(fm0Machine) ###### (3a) # random intercept for each worker fm1Machine <- lme(score ~ - 1 + Machine, random=~1|Worker, data=Machines) summary(fm1Machine) # plot of residuals plot(fm1Machine) #plot(fm1Machine, cex=2, pch=16) fixef(fm1Machine) ranef(fm1Machine) ###### (3b) ## generate an interaction plot: ## averaging the scores for each worker on each machine type, ## plot these averages against the machine type, ## and join the points for each worker. with(Machines, interaction.plot(Machine, Worker, score, las=1)) ## if there were no interactions, the lines in the plots would be ## approximately parallel # ## first possibility for interaction # # add random interaction term for machine within each worker # fm2Machine <- lme(score ~ - 1 + Machine, random=~1|Worker/Machine, data=Machines) # summary(fm2Machine) # plot(fm2Machine) # fixef(fm2Machine) # ranef(fm2Machine) ## second possibility for interaction # add correlation between random interactions, # do not model random effects for Worker fm3Machine <- lme(score ~ - 1 + Machine, random=~Machine-1|Worker, data=Machines) summary(fm3Machine) plot(fm1Machine) fixef(fm3Machine) ranef(fm3Machine) # get covariance matrix of b_i (D <- getVarCov(fm3Machine)) cov2cor(D) # get correlation ### extra! # plot of orgininal data and predictions of model (a) and model (b) with(Machines, plot.default(score ~ Worker, pch=as.numeric(Machine))) legend("topleft", legend=c("Machine A","Machine B","Machine C"), pch=1:3) with(Machines, points(predict(fm1Machine) ~ Worker, pch=as.numeric(Machine), col=3, lwd=2)) with(Machines, points(predict(fm3Machine) ~ Worker, pch=as.numeric(Machine), col=4, lwd=2)) legend("bottomright", legend=c("mod (a)","mod (b)"), fil=3:4) # have a look at different parametrizations, at the example of worker 1 Machine1 <- Machines[Machines$Worker=="1",] model.matrix(score ~ Machine, data=Machine1) # with intercept model.matrix(score ~ - 1 + Machine, data=Machine1) # without intercept ###### (3c) # delete some observations to obtain unbalanced data dim(Machines) MachinesUnb <- Machines[sort(sample(1:54, 44)), ] fm1MachineUnb <- lme(score ~ - 1 + Machine, random=~1|Worker, data=MachinesUnb) summary(fm1MachineUnb) fm3MachineUnb <- lme(score ~ - 1 + Machine, random=~Machine-1|Worker, data=MachinesUnb) summary(fm3MachineUnb) # fixed effects in balanced design fixef(fm1Machine) fixef(fm3Machine) # fixed effects in unbalanced design fixef(fm1MachineUnb) fixef(fm3MachineUnb) # Bei balancierten Daten werden die festen Effekte immer auf die gleichen Werte # geschaetzt unabhaengig von der Spezifizierung der zufaelligen Effekte. # Im unbalancierten Design aendern sich die Schaetzer der festen Effekte, # je nachdem welche zufaelligen Effekte spezifiziert werden. ################################################################################ # Exercise 4 # Oxide: nested design ################################################################################ ##### have a look at the data # library(nlme) ?Oxide plot(Oxide, inner=~Lot) # same color per Lot ###### (4a) ## LMM mit zufaelligen Effekten fuer Scheiben (Wafer) in Chargen (Lot) fm1Oxide <- lme( Thickness ~ 1, random = ~ 1 | Lot/Wafer, data=Oxide ) summary(fm1Oxide) ## feste und zufaellige Effekte fixef(fm1Oxide) ranef(fm1Oxide) ###### (4b) plot(fm1Oxide) # oder von Hand plot(resid(fm1Oxide), col=Oxide$Lot, pch=as.numeric(Oxide$Wafer)) abline(h=0) ################################################################################ # Exercise 5 # Blutdruck ueber die Dosis ################################################################################ ### Simulate data ### set.seed(123) Ji <- rep(6,30) n <- sum(Ji) I <- length(Ji) ID <- rep(1:I, Ji) gender <- 1*(ID>15) dosis <- rep(c(10, 20, 40, 80, 120, 160), I) RI <- rnorm(I, 10) RS <- rnorm(I, sd = 0.09) eps <- rnorm(n, sd = 2) fun <- function(x){ -2*sqrt(x) - x/20 } SBD <- 150 + fun(dosis) - 2 * gender - dosis * gender/100 + RI[ID] + RS[ID] * dosis + eps blutdruck <- data.frame(ID=ID, gender = gender, dosis = dosis, SBD = SBD) ### Plot Data ### pdf("blutdruck.pdf") plot(0, xlim = c(10, 160), ylim = c(min(SBD), max(SBD)), xlab = "Dosis [mg]", ylab = "Systolischer Blutdruck [mmHg]", type = "n", cex.axis = 1.3, cex.lab = 1.3) for (i in 1:I){ data1 <- blutdruck[(ID == i),] points(data1$dosis, data1$SBD, pch = data1$gender) lines(data1$dosis, data1$SBD) ##, lty = data1$gender + 1 } dev.off() ###### (5a) ## Modell mit festen Effekten lm <- lm(SBD ~ gender + dosis + gender * dosis, data=blutdruck) summary(lm) ###### (5b) ## Modell mit random intercept lmm1 <- lme(SBD ~ gender + dosis + gender * dosis, random = ~ 1 |ID, data=blutdruck) summary(lmm1) ###### extra: ## compare estimates of fixed effects cbind(coef(lm), fixef(lmm1)) ## compare variances of estimated of fixed effects vcov(lm) vcov(lmm1) ###### (5c) ### Gemischtes Modell mit zufaelligen Intercepts und zufaelligen Steigunge in dosis lmm <- lme(SBD ~ gender + dosis + gender * dosis, random = ~ 1 + dosis|ID, data=blutdruck) lmm summary(lmm) # Kovarianzmatrix der Schaetzer der festen Effekte lmm$varFix vcov(lmm) ## ergibt das gleiche # Kovarianzmatrix der zufaelligen Effekte getVarCov(lmm)