############################################## # Analysis of Longitudinal Data (SoSe 2017) # # # # Solution for exercise session 1 Exercise 2 # # Date: 04.05.2017 # ############################################## ############### # Exercise 2 # ############### #### Exercise 2 a) #### # read rats data (eventually use setwd() to change the path) rats <- read.csv2('rats.csv',header=T, check.names = FALSE) # check.names = FALSE such that time variable names are not adjusted # transform GROUP and SUBJECT into factor variables rats$GROUP <- factor(rats$GROUP,labels=c('low','high','control')) rats$SUBJECT <- factor(rats$SUBJECT) #### Exercise 2 b) #### # The data frame rats contains one row per subject (wide format) # Using function melt() from package reshape2 to transform the data frame such that all measurements are in seperate rows (long format) library(reshape2) rats.long <- melt(rats, id.vars = c("SUBJECT", "GROUP"), variable.name = "TIME", value.name = "RESPONSE") # Ensure TIME is a numeric variable rats.long$TIME <- as.numeric(as.character(rats.long$TIME)) str(rats.long) # Instead of using the R package reshape2, you may for example also use the function reshape # To transform long into wide format, you might use the function dcast in reshape2 ### Make plots: #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## i) Task: Explain data structure in one plot library(ggplot2) ggplot(rats.long, aes(x = TIME, y = RESPONSE, col = SUBJECT)) + geom_line() ## Possible solution: # Select example curves by e.g. sampling 2 out of each group set.seed(4332909) sel <- numeric() for(i in levels(rats.long$GROUP)) sel <- c(sel, sample( levels(factor(rats.long$SUBJECT[rats.long$GROUP == i])), 2) ) ggplot(rats.long, aes(x = TIME, y = RESPONSE, group = SUBJECT)) + geom_line(col = "darkgrey") + geom_point(col = "darkgrey") + geom_line(data = subset(rats.long, SUBJECT %in% sel), aes(col = GROUP), size = 2) + theme_bw() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## ii) Task: Discuss data structure with researcher who might have individual informations on subjects ggplot(rats.long, aes(x = TIME, y = RESPONSE, col = GROUP)) + geom_line() + facet_wrap(~SUBJECT) ## Possible solution for(grp in levels(rats.long$GROUP)) { plot( ggplot(subset(rats.long, GROUP == grp), aes(x = TIME, y = RESPONSE, col = SUBJECT, lty = SUBJECT)) + geom_line() + facet_wrap(~GROUP, labeller = label_both) + ylim( min(rats.long$RESPONSE, na.rm = TRUE), max(rats.long$RESPONSE, na.rm = TRUE)) ) } # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## iii) Task: Visualize group differences with group specific smooth means ggplot(rats.long, aes(x = TIME, y = RESPONSE, col = GROUP, fill = GROUP)) + stat_smooth() + theme_bw() ## Possible solution ggplot(rats.long, aes(x = TIME, y = RESPONSE)) + geom_line(aes(group = SUBJECT), col = "darkgrey") + stat_smooth(aes(col = GROUP), size = 2, se = FALSE) + facet_wrap(~GROUP) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #### Exercise 2 c) #### # Make Lasagna Plot ## Sort with respect to average response value # Determine order means <- rowMeans(rats[,3:9], na.rm = TRUE) SUBJorder <- rats[,1][order(means)] # Adjust order rats.long$SUBJECT <- ordered(rats.long$SUBJECT, levels = SUBJorder) ggplot(rats.long, aes(x = TIME, y = SUBJECT, fill = RESPONSE)) + geom_raster() + facet_wrap(~GROUP, nrow = 3, scales = "free_y", labeller = label_both)