set.seed(1234) # zwei Unkorrelierte ZV x1 <- rnorm(n = 100, mean = 12, sd = 4) x2 <- rpois(100, lambda = 5) # Noch eine korrelierte x3 <- rnorm(100, mean = x1, sd = 5) hist(x1, breaks = 20) hist(x2, breaks = 20) plot(x1,x2, pch = 19, cex = 2) plot(x1, x3, pch = 19, cex = 2) # Was ist die Kovarianz? (geschätzt) mean(x1*x2) - mean(x1)*mean(x2) mean(x1*x3) - mean(x1)*mean(x3) # Korrelation (liegt zwischen -1 und 1) cor(x1,x2) cor(x1,x3) # Berechnung durch cov(x1,x2)/(sd(x1)*sd(x2)) cov(x1,x3)/(sd(x1)*sd(x3)) # Jetzt Kovarianz einer Matrix cor(cbind(x1,x2,x3)) cov(x1,x3) # symmetrisch # Hauptdiagonale: Varianzen var(x1) var(x2) var(x3) # Sonst: paarweise Kovarianz cov(x1,x2) cov(x1,x3) cov(x2,x3) # Jetzt Kovarianz einer Matrix cov2cor(cov(cbind(x1,x2,x3))) # Ändert sich was bei + 3? cov(cbind(x1,x2,x3) + 3) cov(cbind(x1,x2,x3)) # nein ## Summe von Varianzen var(x1 + x3) var(x1) + var(x3) var(x1) + var(x3) + 2*cov(x1,x3) #---------- # bigger sample x1 <- rnorm(1000, mean = 12, sd = 4) hist(x1, breaks = 50, col = "lightgrey", prob = TRUE) lines(density(x1), col = 2, lwd = 2) z1 <- (x1 - mean(x1)) / sd(x1) hist(z1, breaks = 50, col = "lightgrey", prob = TRUE) lines(density(z1), col = 2, lwd = 2) mean(z1) sd(z1) par(mfrow = c(1,2)) hist(x1, breaks = 50, col = "lightgrey", prob = TRUE) lines(density(x1), col = 2, lwd = 2) hist(z1, breaks = 50, col = "lightgrey", prob = TRUE, xlim = c(-12.5, 12.5)) lines(density(z1), col = 2, lwd = 2) qqnorm(z1) qqline(z1) # p = 2 library(mvtnorm) Sigma <- cov(cbind(x1,x3)) X <- rmvnorm(n = 1000, mean = c(10,10), sigma =Sigma) plot(X) #---------------------- x <- runif(100, min = 0, max = 3) y <- 2 + 3*x + rnorm(100) plot(x, y, pch = 19, cex = 2) # linear model lm1 <- lm(y ~ x) # estimates coef(lm1) # from covariance cov(x,y)/var(x) # abline for regression line abline(lm1, lwd = 2) # mean values points(mean(x), mean(y), pch = 19, col = 2, cex = 2)