library(kableExtra)
## Warning: package 'kableExtra' was built under R version 3.6.2
example <- read.csv("ozoneNA.csv")
example[1:4, c("WindDirection", "maxO3", "T9", "Ne9", "Vx15")] %>%
kbl() %>%
kable_styling()
WindDirection | maxO3 | T9 | Ne9 | Vx15 |
---|---|---|---|---|
North | 87 | 15.6 | 4 | -0.6946 |
North | 82 | NA | 5 | -3.0000 |
East | 92 | 15.3 | 2 | 0.5209 |
North | 114 | 16.2 | 1 | -0.1736 |
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.2
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
theme_set(theme_bw())
df <- economics[, c("date", "psavert", "uempmed")]
df <- df[lubridate::year(df$date) %in% c(1967:1981), ]
# labels and breaks for X axis text
brks <- df$date[seq(1, length(df$date), 12)]
lbls <- lubridate::year(brks)
# plot
ggplot(df, aes(x=date)) +
geom_line(aes(y=psavert, col="psavert")) +
geom_line(aes(y=uempmed, col="uempmed")) +
labs(title="Time Series of Returns Percentage",
subtitle="Drawn From Wide Data format",
caption="Code's source: http://r-statistics.co/", y="Returns %") + # title and caption
scale_x_date(labels = lbls, breaks = brks) + # change to monthly ticks and labels
scale_color_manual(name="",
values = c("psavert"="#00ba38", "uempmed"="#f8766d")) + # line color
theme(panel.grid.minor = element_blank()) # turn off minor grid
library(ggExtra)
data(mpg, package="ggplot2")
# mpg <- read.csv("http://goo.gl/uEeRGu")
# Scatterplot
theme_set(theme_bw()) # pre-set the bw theme.
mpg_select <- mpg[mpg$hwy >= 35 & mpg$cty > 27, ]
g <- ggplot(mpg, aes(cty, hwy)) +
geom_count() +
geom_smooth(method="lm", se=F)
ggMarginal(g, type = "histogram", fill="transparent")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
# ggMarginal(g, type = "density", fill="transparent")
library(ggthemes)
options(scipen = 999) # turns of scientific notations like 1e+40
# Read data
email_campaign_funnel <- read.csv("https://raw.githubusercontent.com/selva86/datasets/master/email_campaign_funnel.csv")
# X Axis Breaks and Labels
brks <- seq(-15000000, 15000000, 5000000)
lbls = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), "m")
# Plot
ggplot(email_campaign_funnel, aes(x = Stage, y = Users, fill = Gender)) + # Fill column
geom_bar(stat = "identity", width = .6) + # draw the bars
scale_y_continuous(breaks = brks, # Breaks
labels = lbls) + # Labels
coord_flip() + # Flip axes
labs(title="Email Campaign Funnel") +
theme_tufte() + # Tufte theme from ggfortify
theme(plot.title = element_text(hjust = .5),
axis.ticks = element_blank()) + # Centre plot title
scale_fill_brewer(palette = "Dark2") # Color palette
library(gapminder)
library(gganimate)
## Warning: package 'gganimate' was built under R version 3.6.2
p <- ggplot(
gapminder,
aes(x = gdpPercap, y=lifeExp, size = pop, colour = continent)
) +
geom_point(alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
scale_x_log10() +
labs(x = "GDP per capita", y = "Life expectancy")
p
p + facet_wrap(~continent) +
transition_time(year) +
labs(title = "Year: {frame_time}")
#https://www.datanovia.com/en/blog/gganimate-how-to-create-plots-with-beautiful-animation-in-r/
library(MASS)
Ns <- seq(2, 200, by=10)
corr_coeff <- c()
for (n in Ns){
x1 <- rnorm(n, mean = 0, sd = 1)
x2 <- rnorm(n, mean = 0, sd = 1)
corr_coeff <- c(corr_coeff, cor(x1, x2, method = "pearson"))
}
data_to_plot <- data.frame("n" = Ns,
"Correlation" = corr_coeff)
ggplot(data_to_plot, aes(x = Ns, y = Correlation)) +
geom_point(color = "blue") +
geom_line(color = "blue", alpha = 0.5) +
theme_bw() +
xlab("n") +
ylab("Empirical correlation")
x1 <- rnorm(5, mean = 0, sd = 1)
x2 <- rnorm(5, mean = 0, sd = 1)
res <- cor.test(x1, x2, method="pearson")
res
##
## Pearson's product-moment correlation
##
## data: x1 and x2
## t = -2.0021, df = 3, p-value = 0.139
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9827890 0.3786244
## sample estimates:
## cor
## -0.7562733
x1 <- rnorm(10000, mean = 0, sd = 1)
x2 <- rnorm(10000, mean = 0, sd = 1)
res <- cor.test(x1, x2, method = "pearson")
res
##
## Pearson's product-moment correlation
##
## data: x1 and x2
## t = 0.6022, df = 9998, p-value = 0.5471
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01357917 0.02561955
## sample estimates:
## cor
## 0.006022504
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.2
data <- data.frame("control" = rnorm(30, mean = 7, sd = 1),
"treatment1" = rnorm(30, mean = 8, sd = 1),
"treatment2" = rnorm(30, mean = 8, sd = 2))
data <- pivot_longer(data, names_to = "sample", values_to = "data", cols = c("control", "treatment1", "treatment2"))
ggplot(data, aes(x = sample, y = data, color = sample)) +
geom_violin() +
geom_jitter(width = 0.2, alpha = 0.5) +
xlab("") +
ylab("Sleeping time (hours)") +
stat_summary(fun.data = mean_cl_normal, width=0.1, conf.int = 0.95)
## Warning: Ignoring unknown parameters: width, conf.int
## This version of Shiny is designed to work with 'htmlwidgets' >= 1.5.
## Please upgrade via install.packages('htmlwidgets').
control <- rnorm(100, mean = 7, sd = 1)
sample1 <- rnorm(100, mean = 8, sd = 1)
sample2 <- rnorm(100, mean = 8, sd = 2)
data <- data.frame("control" = control,
"treatment1" = sample1,
"treatment2" = sample2)
data <- pivot_longer(data, names_to = "sample", values_to = "data", cols = c("control", "treatment1", "treatment2"))
ggplot(data, aes(x = sample, y = data, color = sample)) +
geom_violin() +
geom_jitter(width = 0.2, alpha = 0.1) +
xlab("") +
ylab("Sleeping time (hours)") +
stat_summary(fun.data = mean_cl_normal, width=0.1, conf.int=0.95)
## Warning: Ignoring unknown parameters: width, conf.int
t.test(control, sample2)
##
## Welch Two Sample t-test
##
## data: control and sample2
## t = -5.013, df = 131.63, p-value = 0.000001695
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.5319768 -0.6650346
## sample estimates:
## mean of x mean of y
## 6.932230 8.030736
For full example, see Susan Holmes statistical book
set.seed(123)
numFlips = 100
probHead = 0.6
coinFlips = sample(c("H", "T"), size = numFlips,
replace = TRUE, prob = c(probHead, 1 - probHead))
head(coinFlips)
## [1] "H" "T" "H" "T" "T" "H"