## ---- echo=TRUE,results="hide",message=FALSE-----------------------------
# load package
library(emuR)
# create demo data in directory
# provided by tempdir()
create_emuRdemoData(dir = tempdir())
# create path to demo database
path2ae = file.path(tempdir(), "emuR_demoData", "ae_emuDB")
# load database
ae = load_emuDB(path2ae, verbose = F)

## ------------------------------------------------------------------------
vowels = query(ae,query="Phonetic==i:|u:|E")
vowels_fm = get_trackdata(ae,
                        seglist = vowels,
                        ssffTrackName =  "fm",
                        verbose = FALSE)
# show class of vowels_fm
class(vowels_fm)

## ------------------------------------------------------------------------
vowels_fm05=dcut(vowels_fm,.5,prop = TRUE)

## ------------------------------------------------------------------------
eplot(vowels_fm05[,1:2],label(vowels),centroid=TRUE,formant = TRUE)

## ------------------------------------------------------------------------
dplot(vowels_fm[,2],label(vowels))
dplot(vowels_fm[,2],label(vowels),normalise=TRUE,average=TRUE)

## ------------------------------------------------------------------------
vowels_fm_new = get_trackdata(ae,
                        seglist = vowels,
                        ssffTrackName =  "fm",
                        resultType="emuRtrackdata",
                        verbose = FALSE)
# show class of vowels_fm_new
class(vowels_fm_new)

vowels_fm_new

names(vowels_fm_new)

## ------------------------------------------------------------------------
# if necessary, install.packages(ggplot2)
library(ggplot2)

pfadu = "http://www.phonetik.uni-muenchen.de/~jmh/lehre/Rdf"
asp = read.table(file.path(pfadu, "asp.txt"))
coronal = read.table(file.path(pfadu, "coronal.txt"))
int.df = read.table(file.path(pfadu, "intdauer.txt"))
v.df = read.table(file.path(pfadu, "vdata.txt"))


# check class (data.frame or not):
class(asp)
# the first few lines:
head(coronal)


# 'ai[m,]' = row m
# 'ai[,m]' = column m
# You can use '$Name' to access column "Name"

#############################################################################
# 1. Numerical und categorical variables
############################################################################
# In a data.frame, columns can consist of numerical or categorical variables.
# In a matrix, you can only have one or the other class of variables.

# Numerical variables: continuous
#
class(asp$d)
# or
with(asp, class(d))
# [1] "numeric"

class(int.df$Dauer)
# [1] "integer"

# Categorical variables will be treated as factors (that have two or more levels, or categories; this is different to objects of the class "character"):
class(coronal$Socialclass)
# [1] "factor"

# first 10
coronal$Socialclass[1:10]
# asks which levels are given
levels(coronal$Socialclass)

##########################################################
# 2. Typical example in phonetics
##########################################################
# Is there an influence of x on y?
# 
# 1. y = numerical, x = categorical
# 1.1 difference in duration in /i, e, a/ ?
# 1.2 = influence of x (=vowel) on y (=duration)?
# 1.3 possible geoms: geom_boxplot() 
# or: geom_histogram()  or stat_density()

# 2. y = categorical, x = categorical
# 2.1 words like Sohn, Sonne... can be produced either with /s/ or /z/.
# /s/ more likely in Bavaria or in Hamburg?
# 2.2 possible geom: geom_barchart()

# 3. y = numerical, x = numerical 
# 3.1 bigger mouth opening related to a longer duration?
# 3.2 possible geom: geom_point(), geom_line()

## ------------------------------------------------------------------------
############################################################################
# 3. geom_boxplot(): y = numerical, x = categorical
############################################################################

head(asp)
# Influence of place of articulation (Kons) on duration of aspiration (d)?
# y: d    (numerical)
# x: Kons (categorical)

# Syntax in ggplot()
# A + B + C + D + ...
# A, B, C... are modules.
# Here:
# A. data-frame + B. Variables + C. kind of plot
ggplot(asp) + aes(y = d, x = Kons) + geom_boxplot()

# or
# A
p1 = ggplot(asp)
# B
p2 = aes(y = d, x = Kons)
# C
p3 = geom_boxplot()
# A + B + C
p1 + p2 + p3
# oder A + B + C ablegen
erg = p1 + p2 + p3
# Bild
erg

# boxplot.
# thick line = median;  'Box': interquartile range
# 

## ------------------------------------------------------------------------
############################################################################
# 4. geom_bar(): y ist kategorial, x ist kategorial
############################################################################
head(coronal)
# Influence of region (Region) in place of articulation (F1)?
# y: Fr (categorical)
# x: Region (categorical)

p1 = ggplot(coronal)
p2 = aes(fill = Fr, x = Region)
# to print frequencies of occurance
p3 = geom_bar()
p1 + p2 + p3
# place bars side by side
p4 = geom_bar(position="dodge")
p1 + p2 + p4

# print proportions
p5 = geom_bar(position="fill")
p1 + p2 + p5

## ------------------------------------------------------------------------
############################################################################
# 5. geom_point(), geom_line():  y ist numerisch, x ist numerisch
############################################################################  
# Inwiefern wird die Dauer (Dauer) von der Intensität (dB) beeinflusst in dem Data-Frame int.df()
# y: Dauer (numerisch) 
# x: dB (numerisch)
head(int.df)
# Nur Linie
ggplot(int.df) +  aes(x = dB, y = Dauer) + geom_line() 
# Nur Punkte
ggplot(int.df, aes(x = dB, y = Dauer)) + geom_point() 
# Beide
ggplot(int.df, aes(x = dB, y = Dauer)) + geom_line() + geom_point()

## ------------------------------------------------------------------------
############################################################################
# 6. + xlab() + ylab() + ggtitle()
############################################################################  
# same boxplot as above
p1 = ggplot(asp) + aes(y = d, x = Kons) + geom_boxplot()
# label for x-axis
p2 = xlab("Place of Articulation")
# label for x-axis
p3 = ylab("Duration (ms)")
# Titel
p4 = ggtitle("Boxplot")
p1 + p2 + p3 + p4

# same barchart as above
bar.p = ggplot(coronal) + aes(x = Region, fill = Fr) + geom_bar(position = "fill")
x.p = xlab("Region")
y.p = ylab("Proportion")
t.p = ggtitle("Proportional Distribution of Fricatives")
bar.p + x.p + y.p + t.p

############################################################################
# 7. Limits on axes +xlim() + ylim()
############################################################################

# same geom_bar() as above
p1 = ggplot(int.df, aes(dB, Dauer)) + geom_point() 
# xlim
p2 = xlim(c(10, 60))
# ylim
p3 = ylim(c(30, 280))
p1 + p2 + p3

#reverse axes:
p4 = scale_x_reverse()
p5 = scale_y_reverse()

p1 + p4 + p5

## ------------------------------------------------------------------------
colors()

############################ geom_boxplot()
ggplot(asp) + aes(y = d, x = Kons) + geom_boxplot()
# Default colors
# filled with different colors
ggplot(asp) + aes(y = d, x = Kons, fill = Kons) + geom_boxplot()
# different line colors
ggplot(asp) + aes(y = d, x = Kons, col = Kons) + geom_boxplot()

# or chose your own colors
farben = c("green", "red")
# filled
ggplot(asp) + aes(y = d, x = Kons) + geom_boxplot(fill = farben)
# line colors
ggplot(asp) + aes(y = d, x = Kons) + geom_boxplot(col = farben)

############################ geom_bar()
########## 
p1 = ggplot(coronal) + aes(x = Region, fill = Fr) + geom_bar()
p1
# Eigene Farben wählen
farben = c("yellow", "green")
p2 = scale_fill_manual(values = farben) 
p1 + p2

## ------------------------------------------------------------------------
########## 
ggplot(int.df, aes(x = dB, y = Dauer)) +  geom_point() + geom_line()
# col: color. 
# pch: plotting character. 
# cex: character expansion:cex =2 means 2*standard size
ggplot(int.df, aes(x = dB, y = Dauer)) +  geom_point(col="purple", pch=0, cex=2) + geom_line(col = "pink")
# lwd: Liniendichte
ggplot(int.df, aes(x = dB, y = Dauer)) +  geom_point(col="purple", pch=0, cex=2) + geom_line(col = "pink", lwd=2)

## ------------------------------------------------------------------------
# Default size ist 11 (Legende: 10 (??))

p1 = ggplot(asp) + aes(y = d, x = Kons) + geom_boxplot() + xlab("Artikulationsstelle") + ylab("Dauer (ms)") + ggtitle("Boxplot-Daten")
p1

# size 16
p16 = theme(text = element_text(size=16))
p1 + p16

# change only on axes
q24 = theme(axis.text = element_text(size=24))
p1 + q24

# Different values on axes labels and title
p30 = theme(text = element_text(size=30))
p1 + q24 + p30

## ------------------------------------------------------------------------
#create one boxplot per stress pattern (Bet: levels "be" and "un")
pf = facet_grid(~Bet)
p1 + pf

# or add col to aes():
pc = ggplot(asp) + aes(y = d, x = Kons,col=Bet) + geom_boxplot() + xlab("Artikulationsstelle") + ylab("Dauer (ms)") + ggtitle("Boxplot-Daten")
pc

## ------------------------------------------------------------------------
# if necessary, install.packages(gridExtra)
library(gridExtra)

p1 = ggplot(asp, aes(y = d, x = Kons))  + geom_boxplot()
p2 = ggplot(coronal) + aes(x = Region, fill = Fr) + geom_bar()
p3 = ggplot(int.df, aes(dB, Dauer)) + geom_line() + geom_point()
grid.arrange(p1, p2, p3,  ncol=3, nrow =1)

## ---- eval=FALSE---------------------------------------------------------
## # see
## help(theme)

## ------------------------------------------------------------------------
p1 = ggplot(int.df, aes(dB, Dauer)) + geom_point()
int.lm =  geom_smooth(method="lm",se=FALSE)
p1 + int.lm
#by default, geom_smooth shows the standard error:
int.lmse =  geom_smooth(method="lm")
p1 + int.lmse

# you can calculate this stat (here lm() ) for each facet (e.g. for each subject (Vpn)) separately

p1 + int.lmse + facet_grid(~Vpn)



## ---- warning=FALSE------------------------------------------------------
bat.df = read.table("Rgraphics/dataSets/bat.df.txt")
bat.plot = ggplot(bat.df) + aes(y = p, x = steps) + geom_point(col = "red") +  facet_wrap(~participant)  + ggtitle("bat")

#add listener-specific sigmoids
bat.plot + geom_smooth(method = "glm",se=FALSE,method.args = list(family=binomial))


## ------------------------------------------------------------------------
ell = stat_ellipse()
p1 + ell

## ------------------------------------------------------------------------
td_mid = read.table("Rgraphics/dataSets/td_mid.txt")
p1 = ggplot(td_mid, aes(y = T1, x  = T2, col = labels, label=labels)) 

#add data.points as text labels, defined by their value
p2 = geom_text()
p1 + p2
p3 = stat_ellipse() 
p4 = scale_y_reverse() 
p5 = scale_x_reverse() 
p6 =labs(x = "F2(Hz)", y = "F1(Hz)") 
p7 = theme(legend.position="none")

p1 + p2 + p3 + p4 + p5 + p6 + p7

# only ellipses (do NOT plot data.points)
p1  + p3 + p4 + p5 + p6 + p7

#plot the label-specific means of F1 and F2 (here: T1 and T2)
p2_centroid = geom_text(data = aggregate(cbind(T1,T2)~labels,data=td_mid,FUN=mean))
p1 + p2_centroid + p3 + p4 + p5 + p6 + p7

#btw, we could also vary the linetype
p1_alt = ggplot(td_mid, aes(y = T1, x  = T2, col = labels, label=labels,linetype=labels)) 
p1_alt + p2_centroid + p3 + p4 + p5 + p6

## ------------------------------------------------------------------------
ggplot(vowels_fm_new) +
  aes(x=times_rel,y=T2,col=labels,group=sl_rowIdx) +
  geom_line() +
  labs(x = "vowel duration (ms)", y = "F2 (Hz)")

## ------------------------------------------------------------------------
td_norm = read.table("Rgraphics/dataSets/td_norm.txt")
ggplot(aggregate(T2~times_norm+labels, data = td_norm,FUN=mean)) +
  aes(x=times_norm,y=T2,col=labels) +
  geom_line() +
  labs(x = "vowel duration (normalized)", y = "F2 (Hz)")

