Daten & Packages laden

Laden Sie die folgenden Packages und Data-Frames:

library(tidyverse)
library(gridExtra)
urla = "https://www.phonetik.uni-muenchen.de/studium_lehre/"
urlb = "lehrmaterialien/R_speech_processing/Rdf"
url = paste0(urla, urlb)
preasp = read.table(file.path(url, "preasp.txt"), stringsAsFactors = T)
vdata = read.table(file.path(url, "vdata.txt"), stringsAsFactors = T)

Normalverteilung

mu = mean(1:8)
SE = sd(1:8) * sqrt((7/8)) /sqrt(5)
results = NULL
k = 200
for (j in 1:k) {
  N = 5
  wurf = sample(1:8, N, replace=T)
  results = c(results, mean(wurf))
}

results.df = data.frame(mean_scores = results)
results.df %>%
  ggplot + 
  aes(x = mean_scores) +
  geom_histogram(aes(y =..density..),
                 color = "white",
                 binwidth = .25) +
  geom_function(fun = dnorm,
                args = list(mean = mu, sd = SE),
                col = "blue",
                lwd = 1.5) +
  xlim(1, 8) +
  ylab("Probability density") +
  xlab("Sample mean")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 2 rows containing missing values (`geom_bar()`).

pnorm(2.5, mu, SE)
## [1] 0.02548097
pnorm(7, mu, SE) - pnorm(5, mu, SE)
## [1] 0.3054439
qnorm(0.025, 100, 15)
## [1] 70.60054
qnorm(0.975, 100, 15)
## [1] 129.3995
pnorm(80, 100, 15)
## [1] 0.09121122
pnorm(125, 100, 15) - pnorm(110, 110, 15)
## [1] 0.4522096
# 4 oder 5
50 * pnorm(80, 100, 15)
## [1] 4.560561

Differenz-Abbildungen

Zur Orientierung siehe bitte auch die Lösung zu Q10 hier sowie die zweite Boxplot-Abbildung hier.

preasp %>%
  group_by(Pre, city) %>%
  summarise(m = mean(vc)) %>%
  ungroup()
## `summarise()` has grouped output by 'Pre'. You can override using the `.groups`
## argument.
## # A tibble: 30 × 3
##    Pre     city          m
##    <fct>   <fct>     <dbl>
##  1 -preasp bari      0.359
##  2 -preasp bergamo   0.344
##  3 -preasp cagliari  0.341
##  4 -preasp Catanzaro 0.397
##  5 -preasp firenze   0.355
##  6 -preasp genova    0.329
##  7 -preasp lecce     0.364
##  8 -preasp milano    0.385
##  9 -preasp napoli    0.346
## 10 -preasp palermo   0.333
## # ℹ 20 more rows
preasp %>%
group_by(Pre, city) %>%
summarise(m = mean(vc)) %>%
ungroup() %>%
pivot_wider(names_from = Pre,
           values_from = m) %>%
mutate(d = `-preasp` - `+preasp`)
## `summarise()` has grouped output by 'Pre'. You can override using the `.groups`
## argument.
## # A tibble: 15 × 4
##    city      `-preasp` `+preasp`       d
##    <fct>         <dbl>     <dbl>   <dbl>
##  1 bari          0.359     0.332  0.0273
##  2 bergamo       0.344     0.307  0.0365
##  3 cagliari      0.341     0.312  0.0283
##  4 Catanzaro     0.397     0.333  0.0641
##  5 firenze       0.355     0.320  0.0348
##  6 genova        0.329     0.315  0.0146
##  7 lecce         0.364     0.328  0.0358
##  8 milano        0.385     0.340  0.0453
##  9 napoli        0.346     0.333  0.0132
## 10 palermo       0.333     0.350 -0.0168
## 11 parma         0.346     0.318  0.0280
## 12 perugia       0.374     0.349  0.0257
## 13 roma          0.331     0.318  0.0127
## 14 torino        0.368     0.346  0.0225
## 15 venezia       0.316     0.304  0.0123
# oder:
preasp %>%
  group_by(Pre, city) %>%
  summarise(m = mean(vc)) %>%
  ungroup() %>%
  group_by(city) %>%
  summarise(d = diff(m)) %>%
  ungroup()
## `summarise()` has grouped output by 'Pre'. You can override using the `.groups`
## argument.
## # A tibble: 15 × 2
##    city            d
##    <fct>       <dbl>
##  1 bari      -0.0273
##  2 bergamo   -0.0365
##  3 cagliari  -0.0283
##  4 Catanzaro -0.0641
##  5 firenze   -0.0348
##  6 genova    -0.0146
##  7 lecce     -0.0358
##  8 milano    -0.0453
##  9 napoli    -0.0132
## 10 palermo    0.0168
## 11 parma     -0.0280
## 12 perugia   -0.0257
## 13 roma      -0.0127
## 14 torino    -0.0225
## 15 venezia   -0.0123
preasp %>%
group_by(Pre, city) %>%
summarise(m = mean(vc)) %>%
ungroup() %>%
pivot_wider(names_from = Pre,
           values_from = m) %>%
mutate(d = `-preasp` - `+preasp`) %>%
  ggplot +
  aes(y = d) +
  geom_boxplot() +
  geom_hline(yintercept = 0, col = "red")
## `summarise()` has grouped output by 'Pre'. You can override using the `.groups`
## argument.

vdata %>%
  filter(Tense == "+",
         V %in% c("I", "A")) %>%
  group_by(Cons, Rate, Subj, V) %>%
  summarise(m = mean(dur)) %>%
  ungroup()
## `summarise()` has grouped output by 'Cons', 'Rate', 'Subj'. You can override
## using the `.groups` argument.
## # A tibble: 84 × 5
##    Cons  Rate  Subj  V         m
##    <fct> <fct> <fct> <fct> <dbl>
##  1 K     a     bk    A      223.
##  2 K     a     bk    I      175.
##  3 K     a     ck    A      198.
##  4 K     a     ck    I      158.
##  5 K     a     fs    A      256.
##  6 K     a     fs    I      203.
##  7 K     a     hp    A      214.
##  8 K     a     hp    I      167.
##  9 K     a     ht    A      253.
## 10 K     a     ht    I      198.
## # ℹ 74 more rows
vdata %>%
filter(V %in% c("A", "I")) %>%
filter(Tense == "+") %>%
group_by(Cons, Rate, Subj, V) %>%
summarise(m = mean(dur)) %>%
ungroup() %>%
pivot_wider(names_from = V,
           values_from = m) %>%
mutate(d = A - I)
## `summarise()` has grouped output by 'Cons', 'Rate', 'Subj'. You can override
## using the `.groups` argument.
## # A tibble: 42 × 6
##    Cons  Rate  Subj      A     I     d
##    <fct> <fct> <fct> <dbl> <dbl> <dbl>
##  1 K     a     bk     223.  175. 48.5 
##  2 K     a     ck     198.  158. 39.7 
##  3 K     a     fs     256.  203. 53.2 
##  4 K     a     hp     214.  167. 46.7 
##  5 K     a     ht     253.  198. 54.9 
##  6 K     a     mh     248.  216. 31.5 
##  7 K     a     ta     213   165. 47.9 
##  8 K     b     bk     185.  136. 48.6 
##  9 K     b     ck     147.  145.  1.70
## 10 K     b     fs     184.  163. 21.0 
## # ℹ 32 more rows
# oder
vdata %>%
  filter(Tense == "+",
         V %in% c("I", "A")) %>%
  group_by(Cons, Rate, Subj, V) %>%
  summarise(m = mean(dur)) %>%
  ungroup() %>%
  group_by(Cons, Rate, Subj) %>%
  summarise(d = diff(m)) %>%
  ungroup()
## `summarise()` has grouped output by 'Cons', 'Rate', 'Subj'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'Cons', 'Rate'. You can override using the
## `.groups` argument.
## # A tibble: 42 × 4
##    Cons  Rate  Subj       d
##    <fct> <fct> <fct>  <dbl>
##  1 K     a     bk    -48.5 
##  2 K     a     ck    -39.7 
##  3 K     a     fs    -53.2 
##  4 K     a     hp    -46.7 
##  5 K     a     ht    -54.9 
##  6 K     a     mh    -31.5 
##  7 K     a     ta    -47.9 
##  8 K     b     bk    -48.6 
##  9 K     b     ck     -1.70
## 10 K     b     fs    -21.0 
## # ℹ 32 more rows
# Die Dauer von `A` ist eindeutig höher
# Denn hier wird der Mittelwert
# von `A` von dem Mittelwert von `I`
# abgezogen und das Ergebnis liegt 
# tief unter 0 (Null).
vdata %>%
filter(V %in% c("A", "I")) %>%
filter(Tense == "+") %>%
group_by(Cons, Rate, Subj, V) %>%
summarise(m = mean(dur)) %>%
ungroup() %>%
pivot_wider(names_from = V,
           values_from = m) %>%
mutate(d = A - I) %>%
  ggplot +
  aes(y = d) + 
  geom_boxplot() +
  facet_wrap(Rate ~ Cons) +
    geom_hline(yintercept=0)
## `summarise()` has grouped output by 'Cons', 'Rate', 'Subj'. You can override
## using the `.groups` argument.

# oder vielleicht besser:
vdata %>%
filter(V %in% c("A", "I")) %>%
filter(Tense == "+") %>%
group_by(Cons, Rate, Subj, V) %>%
summarise(m = mean(dur)) %>%
ungroup() %>%
pivot_wider(names_from = V,
           values_from = m) %>%
mutate(d = A - I) %>%
  ggplot +
  aes(y = d, col = Cons) + 
  geom_boxplot() +
  facet_wrap(~ Rate) +
  geom_hline(yintercept=0)
## `summarise()` has grouped output by 'Cons', 'Rate', 'Subj'. You can override
## using the `.groups` argument.