How to generate a table

library(kableExtra)

## Warning: package 'kableExtra' was built under R version 3.6.2

example <- read.csv("ozoneNA.csv")
example[1:4, c("WindDirection", "maxO3", "T9", "Ne9", "Vx15")] %>%
  kbl() %>%
  kable_styling()

WindDirection	maxO3	T9	Ne9	Vx15
North	87	15.6	4	-0.6946
North	82	NA	5	-3.0000
East	92	15.3	2	0.5209
North	114	16.2	1	-0.1736

Fancy plots seen in class

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.6.2

library(lubridate)

## Warning: package 'lubridate' was built under R version 3.6.2

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

theme_set(theme_bw())

df <- economics[, c("date", "psavert", "uempmed")]
df <- df[lubridate::year(df$date) %in% c(1967:1981), ]

# labels and breaks for X axis text
brks <- df$date[seq(1, length(df$date), 12)]
lbls <- lubridate::year(brks)

# plot
ggplot(df, aes(x=date)) + 
  geom_line(aes(y=psavert, col="psavert")) + 
  geom_line(aes(y=uempmed, col="uempmed")) + 
  labs(title="Time Series of Returns Percentage", 
       subtitle="Drawn From Wide Data format", 
       caption="Code's source: http://r-statistics.co/", y="Returns %") +  # title and caption
  scale_x_date(labels = lbls, breaks = brks) +  # change to monthly ticks and labels
  scale_color_manual(name="", 
                     values = c("psavert"="#00ba38", "uempmed"="#f8766d")) +  # line color
  theme(panel.grid.minor = element_blank())  # turn off minor grid


library(ggExtra)
data(mpg, package="ggplot2")
# mpg <- read.csv("http://goo.gl/uEeRGu")

# Scatterplot
theme_set(theme_bw())  # pre-set the bw theme.
mpg_select <- mpg[mpg$hwy >= 35 & mpg$cty > 27, ]
g <- ggplot(mpg, aes(cty, hwy)) + 
  geom_count() + 
  geom_smooth(method="lm", se=F)

ggMarginal(g, type = "histogram", fill="transparent")

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

# ggMarginal(g, type = "density", fill="transparent")


library(ggthemes)
options(scipen = 999)  # turns of scientific notations like 1e+40

# Read data
email_campaign_funnel <- read.csv("https://raw.githubusercontent.com/selva86/datasets/master/email_campaign_funnel.csv")

# X Axis Breaks and Labels 
brks <- seq(-15000000, 15000000, 5000000)
lbls = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), "m")

# Plot
ggplot(email_campaign_funnel, aes(x = Stage, y = Users, fill = Gender)) +   # Fill column
                              geom_bar(stat = "identity", width = .6) +   # draw the bars
                              scale_y_continuous(breaks = brks,   # Breaks
                                                 labels = lbls) + # Labels
                              coord_flip() +  # Flip axes
                              labs(title="Email Campaign Funnel") +
                              theme_tufte() +  # Tufte theme from ggfortify
                              theme(plot.title = element_text(hjust = .5), 
                                    axis.ticks = element_blank()) +   # Centre plot title
                              scale_fill_brewer(palette = "Dark2")  # Color palette

library(gapminder)
library(gganimate)

## Warning: package 'gganimate' was built under R version 3.6.2

p <- ggplot(
  gapminder, 
  aes(x = gdpPercap, y=lifeExp, size = pop, colour = continent)
  ) +
  geom_point(alpha = 0.7) +
  scale_color_viridis_d() +
  scale_size(range = c(2, 12)) +
  scale_x_log10() +
  labs(x = "GDP per capita", y = "Life expectancy")
p

p + facet_wrap(~continent) +
  transition_time(year) +
  labs(title = "Year: {frame_time}")

#https://www.datanovia.com/en/blog/gganimate-how-to-create-plots-with-beautiful-animation-in-r/

Small simulation for correlation coefficient

library(MASS)
Ns <- seq(2, 200, by=10)
corr_coeff <- c()
for (n in Ns){
  x1 <- rnorm(n, mean = 0, sd = 1)
  x2 <- rnorm(n, mean = 0, sd = 1)
  corr_coeff <- c(corr_coeff, cor(x1, x2, method = "pearson"))
}
data_to_plot <- data.frame("n" = Ns,
                           "Correlation" = corr_coeff)
ggplot(data_to_plot, aes(x = Ns, y = Correlation)) +
  geom_point(color = "blue") +
  geom_line(color = "blue", alpha = 0.5) +
  theme_bw() +
  xlab("n") +
  ylab("Empirical correlation")

x1 <- rnorm(5, mean = 0, sd = 1)
x2 <- rnorm(5, mean = 0, sd = 1)
res <- cor.test(x1, x2, method="pearson")
res

## 
##  Pearson's product-moment correlation
## 
## data:  x1 and x2
## t = -2.0021, df = 3, p-value = 0.139
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9827890  0.3786244
## sample estimates:
##        cor 
## -0.7562733

x1 <- rnorm(10000, mean = 0, sd = 1)
x2 <- rnorm(10000, mean = 0, sd = 1)
res <- cor.test(x1, x2, method = "pearson")
res

## 
##  Pearson's product-moment correlation
## 
## data:  x1 and x2
## t = 0.6022, df = 9998, p-value = 0.5471
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.01357917  0.02561955
## sample estimates:
##         cor 
## 0.006022504

Small simuation for somnifere

library(tidyr)

## Warning: package 'tidyr' was built under R version 3.6.2

data <- data.frame("control" = rnorm(30, mean = 7, sd = 1),
                   "treatment1" = rnorm(30, mean = 8, sd = 1),
                   "treatment2" =  rnorm(30, mean = 8, sd = 2))

data <- pivot_longer(data, names_to = "sample", values_to = "data", cols = c("control", "treatment1", "treatment2"))

ggplot(data, aes(x = sample, y = data, color = sample)) +
  geom_violin() +
  geom_jitter(width = 0.2, alpha = 0.5) +
  xlab("") +
  ylab("Sleeping time (hours)") +  
  stat_summary(fun.data = mean_cl_normal, width=0.1, conf.int = 0.95)

## Warning: Ignoring unknown parameters: width, conf.int

## This version of Shiny is designed to work with 'htmlwidgets' >= 1.5.
##     Please upgrade via install.packages('htmlwidgets').

control <- rnorm(100, mean = 7, sd = 1)
sample1 <- rnorm(100, mean = 8, sd = 1)
sample2 <- rnorm(100, mean = 8, sd = 2)

data <- data.frame("control" = control,
                   "treatment1" = sample1,
                   "treatment2" =  sample2)

data <- pivot_longer(data, names_to = "sample", values_to = "data", cols = c("control", "treatment1", "treatment2"))

ggplot(data, aes(x = sample, y = data, color = sample)) +
  geom_violin() +
  geom_jitter(width = 0.2, alpha = 0.1) +
  xlab("") +
  ylab("Sleeping time (hours)") +  
  stat_summary(fun.data = mean_cl_normal, width=0.1, conf.int=0.95)

## Warning: Ignoring unknown parameters: width, conf.int

t.test(control, sample2)

## 
##  Welch Two Sample t-test
## 
## data:  control and sample2
## t = -5.013, df = 131.63, p-value = 0.000001695
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.5319768 -0.6650346
## sample estimates:
## mean of x mean of y 
##  6.932230  8.030736

Small simulation for coin flips

For full example, see Susan Holmes statistical book

set.seed(123)
numFlips = 100
probHead = 0.6
coinFlips = sample(c("H", "T"), size = numFlips,
  replace = TRUE, prob = c(probHead, 1 - probHead))
head(coinFlips)

## [1] "H" "T" "H" "T" "T" "H"

Class 1 - code

How to generate a table

Fancy plots seen in class

Small simulation for correlation coefficient

Small simuation for somnifere

Small simulation for coin flips