# Abbildung pfad = "E:/b/ss07/statistik/regression" d = read.table(paste(pfad, "epg.txt", sep="/"), header=T) attach(d) names(d) # Abbildung COG x F2, COG x F1, EPGSUM1278 x F1 par(mfrow=c(1,3)) plot(F2, COG) plot(F1, COG) temp = LAB %in% c("i", "I", "E", "a") plot(F1[temp], SUM1278[temp]) # Kovarianz y = F2; x = COG; n = length(y) mx = mean(x) my = mean(y) dx = x - mean(x) dy = y - mean(y) covxy = sum(dx * dy)/(n-1) cov(x,y) # Korrelation xgross = x * 1000 cov(x,y); cov(xgross,y) r = cov(x,y)/(sd(x) * sd(y)) cor(x,y); cor(xgross,y) # Regression b = r * sy/sx b = cov(x,y)/var(x) k = my - b*mx yhut = b*x + k plot(x,y) abline(k, b) # Residuals error = y - yhut SSE = sum(error^2) # die lm() Funktion reg = lm(y ~ x) abline(reg) yhut = predict(reg) residuals(reg) # SSY, SSR, SSE SSY = sum( (y - my)^2) SSR = sum((yhut - my)^2) # R-squared SSR/SSY cor(x, y)^2 # Signifikanz-test rsb = tstat = r/rsb 2 * (1 - pt(tstat, n-2)) fstat = tstat^2 1 - pf(fstat, 1, n-2) # summary(), anova() summary(reg) MSR = SSR/1 MSE = SSE/(n-2) anova(reg)