# Ein Objekt laden:
pfad = "den Pfad eingeben, wo die Datei gespeichert wurde"
lvoc = read.table(paste(pfad, "lvoc.txt", sep="/"),  header=T)


# Eine Gruppe, Proportionen
prop.test(5, 20, .5)

# 2 Gruppen, 2 Ebenen
prop.test(c(110, 82), c(200, 190))


# 2 Gruppen, eine Gruppe mit 2 Ebenen, 
# die andere mit mehr als 2 Ebenen
apply(lvoc, 2, sum)
prop.test(c(58, 55, 62, 38), c(92, 104, 146, 97))

# mit chi-quadrat
chisq.test(lvoc)




#############################################
#############################################
## Wie wird chi-quadrat berechnet?
pfad = "Verzeichnis wo Sie lvoc.txt gespeichert haben"
lvoc = read.table(paste(pfad, "lvoc.txt", sep="/"))


# Expected
r = chisq.test(lvoc)
O = lvoc
E = r$expected
d =  (O - E)^2/E
d


############## chi-squared test fuer einen Trend



# lost/lo:st data
pfad = "das Verzeichnis, wo Sie lost.txt gespeichert haben"
lost = as.matrix(read.table(paste(pfad, "lost.txt", sep="/")))
jahr = as.numeric(rownames(lost))
jahr = jahr - 1950
# Proportion von /lo:st/ berechnen
p = lost[,1]/apply(lost, 1, sum)
plot(jahr, p, type="b")

# Spalte 1 hat die Anzahl von /lo:st/
x = lost[,1]
# Summe lo:st + lOst getrennt pro Jahr
n = apply(lost, 1, sum)
prop.trend.test(lost[,1], n, jahr)

#####################
## logistic regression

g = glm(lost ~ jahr, binomial)


p = lost[,1]/apply(lost, 1, sum)
# log-odds
lodd = log(p/(1-p))
plot(jahr, lodd, type="b")
# Regressionslinie ueberlagern
abline(g, col=2)
text(jahr, predict(g), "x", col=3)

####
p = 0.8
L = log(p/(1-p))
# in die andere Richtung
exp(L)/(1+exp(L))

###### Abbildung Jahr x Proportionen

m = coef(g)[2]
k = coef(g)[1]
p = lost[,1]/apply(lost, 1, sum)
plot(jahr,p) 
curve(exp(m*x + k)/(1+ exp(m*x+k)), xlim=c(0, 60), add=T, col=2)
vorher = predict(g)
text(jahr, exp(vorher)/(1+exp(vorher)), "x", col=3)


####################
## Signifikanz-Test

summary(g)
anova(g, test="Chisq")


###########################
### Zwei unabhaengige Variablen

lost2 = as.matrix(read.table(paste(pfad, "lost2.txt", sep="/")))
J = c(jahr, jahr)
G = c(rep("m",6), rep("f", 6)) 

### Interaction-plot
p = lost2[,1]/apply(lost2, 1, sum)
interaction.plot(J, G, p)


# gibt eine Warnung bez. G - ignorieren
mehrg = glm(lost2 ~ J + G, binomial)

# pruefen ob wir dasselbe Ergebnis bekommen
# wie anova(glm(lost ~ jahr, binomial), test="Chisq")
#
g2 = glm(lost2 ~ J, binomial) 
anova(g2,test="Chisq")
 
## zurueck zur mehrfachen Regression
mehrg
anova(mehrg, test="Chisq")


########################################
### Interaktion


g = glm(lost2 ~ J + G + J:G, binomial)
# dasselbe: g = glm(lost2 ~ J * G, binomial)
anova(g, test="Chisq")


library(MASS)
stepAIC(g)

###########################################
## Logistic Regression mit nur 2 Ebenen

gmf = glm(lost2 ~ G, "binomial")
anova(gmf, test="Chisq")

## dasselbe
m = apply(lost2[1:6,], 2, sum)
f = apply(lost2[7:12,], 2, sum)
mf = rbind(m, f)
rownames(mf) = c(0, 1)
colnames(mf) = c("high", "low")
mf

l.mf = c(0,1)
gmf2 = glm(mf ~ l.mf, "binomial")
anova(gmf2, test="Chisq")


################# und ein aehnliches Ergebnis
## mit einem direkten chi-quadrat

chisq.test(mf)

####################################
### Drei Gruppen, jeweils 2 Ebenen

lost3 = as.matrix(read.table(paste(pfad, "alter.txt", sep="/"), header=T))
A = c(0, 0, 1, 1)
G = c(0, 1, 0, 1)
prop = lost3[,1]/apply(lost3, 1, sum)
interaction.plot(A, G, prop)

g = glm(lost3 ~ A * G, binomial)
anova(g, test="Chisq")