# --------------------------------------------------------------------------
# -------- Übung zur Vorlesung Generalisierte Regressionsmodelle -----------
# -------- R Code / Blatt 3                                      -----------
# --------------------------------------------------------------------------



# -------- Aufgabe 2 -----------------------------------------------------

# c)
# keine individuellen Beobachtungen gegeben -> gruppierte Daten, gruppiertes Logit-Modell
# n willkürlich festlegen
n <- 10 # estimates unabhängig von konkretem n, nicht aber se -> z -> p
probs <- c(0.9, 0.7, 0.6, 0.4)
df.unemployment <- data.frame(yes = probs * n, no = n * (1-probs), 
        xG = c(1,1,0,0), xA = c(1,0,1,0))
glm.unemployment <- glm(cbind(yes, no)~ xG*xA, data = df.unemployment, 
        family = binomial())
summary(glm.unemployment)# deviance residuen = 0, da perfekte anpassung moeglich
# vgl. fitted(glm.unemployment)


# e)
# Loglikelihood des unrestringierten Modells:
logLik(glm.unemployment)

# Modell unter H0:
glm.unemployment.h0 <- glm(cbind(yes, no) ~ xG + xA, data = df.unemployment, 
        family=binomial)
logLik(glm.unemployment.h0)

# LQ-Statistik:
(lambda <- -2*(logLik(glm.unemployment.h0) - logLik(glm.unemployment)))

# Testentscheidung zu alpha = 0.05:
(quant <- qchisq(0.95, 1))
lambda > quant

# p-Wert:
1 - pchisq(lambda, 1) # 0.72

# Problem: der wahre Stichprobenumfang und die Häufigkeit der einzelnen xG-xA-
# Kombinationen sind hier nicht bekannt. Was passiert z.B. bei doppelt sovielen
# aber identischen Beobachtungen?
n.2 <- 20 
df.unemployment.2 <- data.frame(yes = probs * n.2, no = n.2 * (1-probs), 
        xG = c(1,1,0,0), xA = c(1,0,1,0))

# Modelle
glm.unemployment.2 <- glm(cbind(yes, no)~ xG*xA, data = df.unemployment.2, 
        family = binomial())
glm.unemployment.2.h0 <- glm(cbind(yes, no) ~ xG + xA, data = df.unemployment.2, 
        family=binomial)

# LQ-Statistik:
(lambda2 <- -2*(logLik(glm.unemployment.2.h0) - logLik(glm.unemployment.2)))

# p-Wert:
1 - pchisq(lambda2, 1)

# Vergleich:
c(lambda, lambda2)
## => wie bereits bekannt gilt auch hier: Signifikanz hängt vom Stichproben-
##    umfang ab!

# elegantere Berechnung:
anova(glm.unemployment, glm.unemployment.h0, test = "LRT")
anova(glm.unemployment.2, glm.unemployment.2.h0, test = "LRT")