Daten & Packages laden

Bitte eingeben:

Zeit erlaubt: 90 Minuten. Bitte alle Fragen beantworten und Ihre Rmd-Datei regelmäßig speichern. Schicken Sie am Ende der Klausur Ihre Rmd Datei an Jonathan Harrington Email = .

Laden Sie die folgenden Packages und Data-Frames:

library(tidyverse)
library(broom)
library(gridExtra)
library(lmerTest)
library(emmeans)
library(MuMIn)
library(afex)
sigmoid = function(x, k = 0, m = 1) {
  exp(m * x + k) / (1 + exp(m * x + k))
}
urla = "https://www.phonetik.uni-muenchen.de/studium_lehre/"
urlb = "lehrmaterialien/R_speech_processing/Rdf"
url = paste0(urla, urlb)
v.df = read.table(file.path(url, "vdata.txt"), 
                          stringsAsFactors = T)
s.df = read.table(file.path(url, "sib2.df.txt"), 
                          stringsAsFactors = T) %>%
  mutate(G = substring(Vpn, 1, 1)) %>%
  filter(G == "M") %>%
  mutate(Vpn = factor(Vpn))
sg.df = read.table(file.path(url, "sagtp.df.txt"), 
                  stringsAsFactors = T)
p.df = read.table(file.path(url, "preasp.txt"), 
                  stringsAsFactors = T)
v.df %>%
  group_by(V, Cons) %>%
  summarise(m = mean(F2), s = sd(F2))
## `summarise()` has grouped output by 'V'. You can override using the `.groups`
## argument.
## # A tibble: 21 × 4
## # Groups:   V [7]
##    V     Cons      m     s
##    <fct> <fct> <dbl> <dbl>
##  1 %     K     1505. 212. 
##  2 %     P     1413. 186. 
##  3 %     T     1510. 207. 
##  4 A     K     1366. 149. 
##  5 A     P     1200.  89.1
##  6 A     T     1295. 114. 
##  7 E     K     1945. 229. 
##  8 E     P     1813. 274. 
##  9 E     T     1865. 250. 
## 10 I     K     2100. 258. 
## # ℹ 11 more rows
v.df %>%
  group_by(V, Cons) %>%
  summarise(m = mean(F2), s = sd(F2), r = m/s) %>%
  ggplot +
  aes(y = r) +
  geom_boxplot()
## `summarise()` has grouped output by 'V'. You can override using the `.groups`
## argument.

s.df %>%
  ggplot + 
  aes(y = M1, x = K, col = Stress) +
  geom_boxplot()

# K, Stress sind within im Bez. zur Vpn
s.df %>%
  mutate(Fac = interaction(K, Stress)) %>%
  select(Vpn, Fac) %>%
  table()
##      Fac
## Vpn   s.strong S.strong s.weak S.weak
##   M04       60       60     50     50
##   M08       60       60     50     50
##   M09       60       60     50     50
##   M16       59       60     50     50
##   M18       60       60     50     50
##   M19       60       60     50     50
# K, Stress sind between im Bez. zu Wort
s.df %>%
  mutate(Fac = interaction(K, Stress)) %>%
  select(Wort, Fac) %>%
  table()
##               Fac
## Wort           s.strong S.strong s.weak S.weak
##   Assault            60        0      0      0
##   Assembly           60        0      0      0
##   Disheveled          0       60      0      0
##   Fascinating         0        0     60      0
##   Information         0        0      0     60
##   Machine             0       60      0      0
##   Messy               0        0     60      0
##   Minnesota          60        0      0      0
##   Motorcycle          0        0     60      0
##   Overshadowed        0       60      0      0
##   Passionate          0        0      0     60
##   Perishable          0        0      0     60
##   Policy              0        0     60      0
##   Polishing           0        0      0     60
##   Possible            0        0     60      0
##   sane               59        0      0      0
##   seem               60        0      0      0
##   Shane               0       60      0      0
##   sheep               0       60      0      0
##   Show                0       60      0      0
##   Soak               60        0      0      0
##   Tissue              0        0      0     60
# Modell
s.df %>%
  lmer(M1 ~ K * Stress + (K+Stress|Vpn) + (1|Wort), .) %>%
anova()
## Type III Analysis of Variance Table with Satterthwaite's method
##          Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## K        34.553  34.553     1 16.284 235.410 4.176e-11 ***
## Stress    5.015   5.015     1 19.944  34.164 1.031e-05 ***
## K:Stress  3.143   3.143     1 18.001  21.413 0.0002092 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# alles signifikant. Post-hoc test:
s.df %>%
  lmer(M1 ~ K * Stress + (K+Stress|Vpn) + (1|Wort), .) %>%
emmeans(., ~ K * Stress) %>%
  pairs(., simple="each", combine=T)
##  Stress K contrast      estimate    SE   df t.ratio p.value
##  strong . s - S            1.900 0.122 20.8  15.532  <.0001
##  weak   . s - S            1.189 0.131 21.5   9.090  <.0001
##  .      s strong - weak    0.875 0.117 21.3   7.448  <.0001
##  .      S strong - weak    0.163 0.117 21.3   1.392  0.7127
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: bonferroni method for 4 tests
# Koeffiziente
km = sg.df %>%
  glm(Urteil ~ Stimulus, ., family=binomial) %>%
  coef()
km
## (Intercept)    Stimulus 
##   17.193420   -2.323632
# Abbildung
sg.df %>%
  group_by(Stimulus, Urteil) %>%
  summarise(n = n()) %>%
  mutate(prop = n/sum(n)) %>%
  filter(Urteil == levels(Urteil)[2]) %>%
  ggplot() +
  aes(y = prop, x = Stimulus) +
  geom_point() +
  stat_function(fun = sigmoid, 
                args=list(k = km[1], m = km[2])) +
  ylab("Proportion sagt")
## `summarise()` has grouped output by 'Stimulus'. You can override using the
## `.groups` argument.

# Test
sg.df %>%
  glm(Urteil ~ Stimulus, ., family=binomial) %>%
  anova(test='Chisq')
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Urteil
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                       109    145.286              
## Stimulus  1   117.03       108     28.261 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Umkipppunkt
-km[1]/km[2]
## (Intercept) 
##    7.399372
# Proportion 'sagt' bei Stimulus 6.5
sigmoid(6.5, km[1], km[2])
##  Stimulus 
## 0.8899088
# Erwachsene: normale Sprechgeschwindigkeit
e_n = c(46, 50, 58, 57, 48, 44, 57, 50, 47, 51)
# Kinder: normale Sprechgeschwindigkeit
c_n = c(75, 43, 72, 82, 81, 62, 84, 79, 75, 68)

Die Vokaldauern derselben untebtonten Silbe produziert von denselben Sprechern aber zu einer schnellen Sprechgeschwindigkeit waren wie folgt:

# Erwachsene: schnelle Sprechgeschwindigkeit
e_s = c(14, 35, 32, 27, 35, 11, 35, 34, 29, 29)
# Kinder: schnelle Sprechgeschwindigkeit
c_s = c(67, 55, 73, 70, 68, 45, 31, 35, 59, 74)

Prüfen Sie durch eine Abbildung und statistischen Test, ob die Sprechergruppe und/oder Sprechgeschindigkeit einen Einfluss auf die Vokaldauern hatten.

v = c(e_n, c_n, e_s, c_s)
gr = factor(rep(rep(c("E", "C"), each = 10), 2))
geschw = factor(rep(c("L", "S"), each = 20))
vpn = factor(rep(paste0("S",  1:20), 2))

# Größere Dauer für Kinder als für Erwachsene
# Größere Dauer für langsam als für schnell
# Eventuell keine Interaktion?
d.df = data.frame(v, gr, geschw,  vpn)
d.df %>%
  ggplot + 
  aes(y = v, x = gr, col=geschw) +
  geom_boxplot()

# Sprechegruppe und Geschwindigkeit sig. 
d.df %>% 
  aov_4(v ~ gr * geschw + (geschw|vpn), .)
## Contrasts set to contr.sum for the following variables: gr
## Anova Table (Type 3 tests)
## 
## Response: v
##      Effect    df    MSE         F  ges p.value
## 1        gr 1, 18 131.85 49.13 *** .590   <.001
## 2    geschw 1, 18 118.13 29.13 *** .433   <.001
## 3 gr:geschw 1, 18 118.13      1.46 .037    .243
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
# Eventuell eine signifikante 
p.df %>%
  filter(city %in% c("napoli", "palermo", "parma")) %>%
  filter(vtype == "a") %>%
  ggplot +
  aes(y = vdur, x = clodur) +
  geom_point() +
  geom_smooth(method = "lm", se = F, color = "blue")
## `geom_smooth()` using formula = 'y ~ x'

# Ja, sig R^2 = 0.231, F[1, 46] = 13.82, p < 0.001
p.df %>%
  filter(city %in% c("napoli", "palermo", "parma")) %>%
  filter(vtype == "a") %>%
  lm(vdur ~ clodur, .) %>%
  summary()
## 
## Call:
## lm(formula = vdur ~ clodur, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.044193 -0.018318 -0.005661  0.009296  0.061852 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.13540    0.01482   9.134  6.7e-12 ***
## clodur      -0.34168    0.09191  -3.717 0.000545 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02702 on 46 degrees of freedom
## Multiple R-squared:  0.231,  Adjusted R-squared:  0.2143 
## F-statistic: 13.82 on 1 and 46 DF,  p-value: 0.0005449
p.df %>% 
  ggplot + 
  aes(y = vdur, x = clodur) +
  facet_wrap(~vtype) + 
  geom_point()

Ihre Antwort: