library(ggplot2) #this file gives the source code for illustrations in the plot tool #DISTRIBUTION #distribution 1-var, few data points ##histogram ID <- 1:200 values <- rnorm(200, mean=50, sd=15) df <- data.frame(ID, values) ggplot(df, aes(values)) + geom_histogram(binwidth=3) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##dotplot ID <- 1:200 values <- rnorm(200, mean=50, sd=15) df <- data.frame(ID, values) ggplot(df, aes(values)) + geom_dotplot() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##freqpoly ID <- 1:200 values <- rnorm(200, mean=50, sd=15) df <- data.frame(ID, values) ggplot(df, aes(values)) + geom_freqpoly(size=2) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #distribution 1-var, many data points ##density distribution ID <- 1:1000 values <- rnorm(1000, mean=50, sd=10) df <- data.frame(ID, values) ggplot(df, aes(values)) + geom_density(kernel="gaussian", size=2) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #distribution 1-var, many data points ##violin ID <- 1:1000 values <- rnorm(1000, mean=50, sd=10) df <- data.frame(ID, values) ggplot(df, aes(ID, values)) + geom_violin(trim=FALSE, width=.5, size=2) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #distribution 2-var, ##scatterplot ID <- 1:400 values1 <- rnorm(200, mean=50, sd=10) values2 <- c(rnorm(100, mean=20, sd=10), rnorm(100, mean=40, sd=10)) df <- data.frame(ID, values1, values2) ggplot(df, aes(values1, values2)) + geom_point(size=2) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #distribution 2-var, ##hexbin ID <- 1:4000 values1 <- rnorm(4000, mean=50, sd=10) values2 <- rnorm(4000, mean=5, sd=1) df <- data.frame(ID, values1, values2) ggplot(df, aes(values1, values2)) + geom_hex(bins = 70) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #distribution 2-var, ##2dhistogram ID <- 1:4000 values1 <- rnorm(4000, mean=50, sd=10) values2 <- rnorm(4000, mean=5, sd=1) df <- data.frame(ID, values1, values2) ggplot(df, aes(values1, values2)) + geom_bin2d(bins = 70) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #distribution 2-var, ##2d density ID <- 1:4000 values1 <- rnorm(4000, mean=50, sd=10) values2 <- rnorm(4000, mean=5, sd=1) df <- data.frame(ID, values1, values2) ggplot(df, aes(values1, values2)) + stat_density_2d(aes(fill = ..level..), geom = "polygon") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #distribution 2-var, ##jitter ID <- 1:600 values <- c(rnorm(200, mean=50, sd=15), rnorm(200, mean=80, sd=10), rnorm(200, mean=30, sd=5)) category <- c(rep("A", 200), rep("B",200), rep("C",200)) df <- data.frame(ID, category, values) ggplot(df, aes(category, values)) + geom_jitter(width=.1) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #distribution 2-var, ##violin ID <- 1:600 values <- c(rnorm(200, mean=50, sd=15), rnorm(200, mean=80, sd=10), rnorm(200, mean=30, sd=5)) category <- c(rep("A", 200), rep("B",200), rep("C",200)) df <- data.frame(ID, category, values) ggplot(df, aes(category, values)) + geom_violin(trim=FALSE, width=.8) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ################################################################################################## #COMPARISON #comparison few items, few categories ##simple bar chart ID <- 1:600 values <- c(rnorm(150, mean=25, sd=5), rnorm(150, mean=30, sd=4), rnorm(150, mean=15, 3), rnorm(150, mean=40, sd=5)) category <- c(rep("A", 150), rep("B", 150), rep("C", 150), rep("D", 150)) df <- data.frame(ID, category, values) ggplot(df, aes(category, values)) + geom_bar(stat="summary", fun.y="mean") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##simple bar chart with error bars ID <- 1:600 values <- c(rnorm(150, mean=25, sd=5), rnorm(150, mean=30, sd=4), rnorm(150, mean=15, 3), rnorm(150, mean=40, sd=5)) category <- c(rep("A", 150), rep("B", 150), rep("C", 150), rep("D", 150)) df <- data.frame(ID, category, values) ggplot(df, aes(category, values)) + geom_bar(stat="summary", fun.y="mean") + stat_summary(fun.data=mean_sdl, geom="errorbar", colour = "red", width=.3, size=1.5) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##box plot ID <- 1:600 values <- c(rnorm(150, mean=25, sd=5), rnorm(150, mean=30, sd=4), rnorm(150, mean=15, 3), rnorm(150, mean=40, sd=5)) category <- c(rep("A", 150), rep("B", 150), rep("C", 150), rep("D", 150)) df <- data.frame(ID, category, values) ggplot(df, aes(category, values)) + geom_boxplot() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##clustered bar chart ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeux <- c("1","2") category2 <- rep(undeux, 200) df <- data.frame(ID, category1, category2, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##clustered bar chart with CI ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeux <- c("1","2") category2 <- rep(undeux, 200) df <- data.frame(ID, category1, category2, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + stat_summary(fun.data=mean_sdl, width=.3, position=position_dodge(0.90),geom="errorbar", size=1.5, color="#666666") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #comparison many items, few categories ##clustered bar chart ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeux <- c("1","2","3","4") category2 <- rep(undeux, 100) df <- data.frame(ID, category1, category2, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + scale_fill_viridis_d() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##clustered bar chart with CI ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeux <- c("1","2","3","4") category2 <- rep(undeux, 100) df <- data.frame(ID, category1, category2, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + stat_summary(fun.data=mean_sdl, width=.3, position=position_dodge(0.90),geom="errorbar", size=1.5, color="#666666") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##horizontal clustered bar chart ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeux <- c("1","2","3","4") category2 <- rep(undeux, 100) df <- data.frame(ID, category1, category2, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + scale_fill_viridis_d() + coord_flip() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##horizontal clustered bar chart with CI ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeux <- c("1","2","3","4") category2 <- rep(undeux, 100) df <- data.frame(ID, category1, category2, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + stat_summary(fun.data=mean_sdl, width=.3, position=position_dodge(0.90),geom="errorbar", size=1.5, color="#666666") + coord_flip() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##grid of clustered bar charts ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeuxtroisquatre <- c("1","2","3","4") firstsecond <- c("first", "second") category2 <- rep(undeuxtroisquatre, 100) category3 <- rep(firstsecond, 200) df <- data.frame(ID, category1, category2, category3, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + facet_wrap( ~ category3, ncol=2) + scale_fill_viridis_d() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##grid of clustered bar charts with CI ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeuxtroisquatre <- c("1","2","3","4") firstsecond <- c("first", "second") category2 <- rep(undeuxtroisquatre, 100) category3 <- rep(firstsecond, 200) df <- data.frame(ID, category1, category2, category3, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + stat_summary(fun.data=mean_sdl, width=.3, position=position_dodge(0.90),geom="errorbar", size=1.5, color="#666666") + facet_wrap( ~ category3, ncol=2) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##clustered boxplots ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeuxtroisquatre <- c("1","2","3","4") category2 <- rep(undeuxtroisquatre, 100) df <- data.frame(ID, category1, category2, values) ggplot(df, aes(category1, values, fill=category2)) + geom_boxplot() + scale_fill_viridis_d() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##clustered boxplots ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) category2 <- rep(c("1","2"), 100) df <- data.frame(ID, category1, category2, values) ggplot(df, aes(category1, values, fill=category2)) + geom_boxplot() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) head(df) ##grid of boxplots ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=15, 3), rnorm(100, mean=40, sd=5)) category1 <- c(rep("A", 100), rep("B", 100), rep("C", 100), rep("D", 100)) undeuxtroisquatre <- c("1","2","3","4") firstsecond <- c("first", "second") category2 <- rep(undeuxtroisquatre, 100) category3 <- rep(firstsecond, 200) df <- data.frame(ID, category1, category2, category3, values) ggplot(df, aes(category1, values, fill=category2)) + geom_boxplot() + facet_wrap( ~ category3, ncol=2) + scale_fill_viridis_d() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #comparison many items, many categories ##grid of clustered box plots ID <- 1:800 values <- c(rnorm(200, mean=25, sd=5), rnorm(200, mean=30, sd=4), rnorm(200, mean=15, 3), rnorm(200, mean=40, sd=5)) category1 <- c(rep("A", 200), rep("B", 200), rep("C", 200), rep("D", 200)) undeuxtroisquatre <- c("1","2","3","4","5","6","7","8") firstsecond <- c("first", "second", "third", "fourth") category2 <- rep(undeuxtroisquatre, 100) category3 <- rep(firstsecond, 200) df <- data.frame(ID, category1, category2, category3, values) ggplot(df, aes(category1, values, fill=category2)) + geom_boxplot() + facet_wrap( ~ category3, ncol=2) + scale_fill_viridis_d() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##grid of clustered bar charts ID <- 1:800 values <- c(rnorm(200, mean=25, sd=5), rnorm(200, mean=30, sd=4), rnorm(200, mean=15, 3), rnorm(200, mean=40, sd=5)) category1 <- c(rep("A", 200), rep("B", 200), rep("C", 200), rep("D", 200)) undeuxtroisquatre <- c("1","2","3","4","5","6","7","8") firstsecond <- c("first", "second", "third", "fourth") category2 <- rep(undeuxtroisquatre, 100) category3 <- rep(firstsecond, 200) df <- data.frame(ID, category1, category2, category3, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + facet_wrap( ~ category3, ncol=2) + scale_fill_viridis_d() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##grid of clustered bar charts with CI ID <- 1:800 values <- c(rnorm(200, mean=25, sd=5), rnorm(200, mean=30, sd=4), rnorm(200, mean=15, 3), rnorm(200, mean=40, sd=5)) category1 <- c(rep("A", 200), rep("B", 200), rep("C", 200), rep("D", 200)) undeuxtroisquatre <- c("1","2","3","4","5","6","7","8") firstsecond <- c("first", "second", "third", "fourth") category2 <- rep(undeuxtroisquatre, 100) category3 <- rep(firstsecond, 200) df <- data.frame(ID, category1, category2, category3, values) ggplot(df, aes(category1, values, fill=category2)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + stat_summary(fun.data=mean_sdl, width=.3, position=position_dodge(0.90),geom="errorbar", size=1.5, color="#666666") + facet_wrap( ~ category3, ncol=2) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #comparison few periods, few categories ##simple bar chart ID <- 1:600 values <- c(rnorm(150, mean=25, sd=5), rnorm(150, mean=30, sd=4), rnorm(150, mean=35, 3), rnorm(150, mean=40, sd=5)) year <- c(rep("2015", 150), rep("2016", 150), rep("2017", 150), rep("2018", 150)) df <- data.frame(ID, category, values) ggplot(df, aes(year, values)) + geom_bar(stat="summary", fun.y="mean") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##simple bar chart with error bars ID <- 1:600 values <- c(rnorm(150, mean=25, sd=5), rnorm(150, mean=30, sd=4), rnorm(150, mean=35, 3), rnorm(150, mean=40, sd=5)) year <- c(rep("2015", 150), rep("2016", 150), rep("2017", 150), rep("2018", 150)) df <- data.frame(ID, year, values) ggplot(df, aes(year, values)) + geom_bar(stat="summary", fun.y="mean") + stat_summary(fun.data=mean_sdl, geom="errorbar", colour = "red", width=.3, size=1.5) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##clustered bar chart ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=40, sd=5)) year <- c(rep("2015", 100), rep("2016", 100), rep("2017", 100), rep("2018", 100)) undeux <- c("1","2") category <- rep(undeux, 200) df <- data.frame(ID, year, category, values) ggplot(df, aes(year, values, fill=category)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##clustered bar chart with CI ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=40, sd=5)) year <- c(rep("2015", 100), rep("2016", 100), rep("2017", 100), rep("2018", 100)) undeux <- c("1","2") category <- rep(undeux, 200) df <- data.frame(ID, year, category, values) ggplot(df, aes(year, values, fill=category)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + stat_summary(fun.data=mean_sdl, width=.3, position=position_dodge(0.90),geom="errorbar", size=1.5, color="#666666") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #comparison few periods, many categories ##clustered bar chart ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=40, sd=5)) year <- c(rep("2015", 100), rep("2016", 100), rep("2017", 100), rep("2018", 100)) unhuit <- c("1","2","3","4","5","6","7","8") category <- rep(unhuit, 50) df <- data.frame(ID, year, category, values) ggplot(df, aes(year, values, fill=category)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##clustered bar chart with CI ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=40, sd=5)) year <- c(rep("2015", 100), rep("2016", 100), rep("2017", 100), rep("2018", 100)) unhuit <- c("1","2","3","4","5","6","7","8") category <- rep(unhuit, 50) df <- data.frame(ID, year, category, values) ggplot(df, aes(year, values, fill=category)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + stat_summary(fun.data=mean_sdl, width=.3, position=position_dodge(0.90),geom="errorbar", size=1.5, color="#666666") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##line chart ID <- 1:400 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=32, 3), rnorm(100, mean=40, sd=5)) year <- c(rep("2015", 100), rep("2016", 100), rep("2017", 100), rep("2018", 100)) unquatre <- c("1","2","3","4") category <- rep(unquatre, 100) df <- data.frame(ID, year, category, values) ggplot(df, aes(x=year, y=values, group=category)) + stat_summary(aes(color=category, linetype=category), fun.y=mean, size=1.5, position=position_dodge(.15), geom="line") + geom_point(aes(color=category), size = 3, stat="summary", fun.y=mean, position=position_dodge(.15)) + scale_color_viridis_d() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #comparison many periods, non-cyclic data ##clustered bar chart ID <- 1:1200 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=40, sd=5), rnorm(100, mean=35, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=35, sd=5),rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=20, sd=5)) month <- rep(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), each=100) undeux <- c("1","2") category <- rep(undeux, 600) df <- data.frame(ID, month, category, values) ggplot(df, aes(month, values, fill=category)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + scale_x_discrete(limits=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##line chart ID <- 1:1200 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=40, sd=5), rnorm(100, mean=35, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=35, sd=5),rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=20, sd=5)) month <- rep(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), each=100) undeux <- c("1","2") category <- rep(undeux, 600) df <- data.frame(ID, month, category, values) ggplot(df, aes(x=month, y=values, group=category)) + stat_summary(aes(color=category, linetype=category), fun.y=mean, size=1.5, position=position_dodge(.15), geom="line") + geom_point(aes(color=category), size = 3, stat="summary", fun.y=mean, position=position_dodge(.15)) + scale_x_discrete(limits=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #comparison many periods, cyclic data ##circular line chart ID <- 1:1200 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=40, sd=5), rnorm(100, mean=35, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=35, sd=5),rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=20, sd=5)) month <- rep(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), each=100) undeux <- c("1","2") category <- rep(undeux, 600) df <- data.frame(ID, month, category, values) ggplot(df, aes(x=month, y=values, group=category)) + stat_summary(aes(color=category, linetype=category), fun.y=mean, size=1.5, position=position_dodge(.15), geom="line") + geom_point(aes(color=category), size = 3, stat="summary", fun.y=mean, position=position_dodge(.15)) + scale_x_discrete(limits=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) + coord_polar() + ylim(0,55)+ scale_color_viridis_d() theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##circular bar chart ##clustered bar chart ID <- 1:1200 values <- c(rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=40, sd=5), rnorm(100, mean=35, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=35, sd=5),rnorm(100, mean=25, sd=5), rnorm(100, mean=30, sd=4), rnorm(100, mean=35, 3), rnorm(100, mean=20, sd=5)) month <- rep(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), each=100) undeux <- c("1","2") category <- rep(undeux, 600) df <- data.frame(ID, month, category, values) ggplot(df, aes(month, values, fill=category)) + stat_summary(fun.y=mean, geom="bar", position=position_dodge(width=0.90)) + scale_x_discrete(limits=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) + ylim(-60,55) + coord_polar() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ################################################################################################## #RELATIONSHIP #relationship 2-var ##scatter plot ID <- 1:300 values1 <- c(rnorm(100, mean=50, sd=15), rnorm(100, mean=75, sd=15), rnorm(100, mean=100, sd=15)) values2 <- c(rnorm(100, mean=20, sd=10), rnorm(100, mean=30, sd=10),rnorm(100, mean=40, sd=10)) df <- data.frame(ID, values1, values2) ggplot(df, aes(values1, values2)) + geom_point(size=2) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##scatter plot with loess curve fitting ID <- 1:300 values1 <- c(rnorm(100, mean=50, sd=15), rnorm(100, mean=75, sd=15), rnorm(100, mean=100, sd=15)) values2 <- c(rnorm(100, mean=20, sd=10), rnorm(100, mean=30, sd=10),rnorm(100, mean=40, sd=10)) df <- data.frame(ID, values1, values2) ggplot(df, aes(values1, values2)) + geom_point(size=2) + geom_smooth() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##scatter plot with regression line ID <- 1:300 values1 <- c(rnorm(100, mean=50, sd=15), rnorm(100, mean=75, sd=15), rnorm(100, mean=100, sd=15)) values2 <- c(rnorm(100, mean=20, sd=10), rnorm(100, mean=30, sd=10),rnorm(100, mean=40, sd=10)) df <- data.frame(ID, values1, values2) ggplot(df, aes(values1, values2)) + geom_point(size=2) + geom_smooth(method='lm') + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #relationship 3-var ##scatter plot with color scale ID <- 1:300 values1 <- c(rnorm(100, mean=50, sd=15), rnorm(100, mean=75, sd=15), rnorm(100, mean=100, sd=15)) values2 <- c(rnorm(100, mean=20, sd=10), rnorm(100, mean=30, sd=10),rnorm(100, mean=40, sd=10)) values3 <- c(rnorm(100, mean=2, sd=1), rnorm(100, mean=3, sd=1),rnorm(100, mean=4, sd=1)) df <- data.frame(ID, values1, values2, values3) ggplot(df, aes(values1, values2)) + geom_point(aes(color=values3), size=2) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##bubble plot ID <- 1:300 values1 <- c(rnorm(100, mean=50, sd=15), rnorm(100, mean=75, sd=15), rnorm(100, mean=100, sd=15)) values2 <- c(rnorm(100, mean=20, sd=10), rnorm(100, mean=30, sd=10),rnorm(100, mean=40, sd=10)) values3 <- c(rnorm(100, mean=2, sd=1), rnorm(100, mean=5, sd=1),rnorm(100, mean=8, sd=1)) df <- data.frame(ID, values1, values2, values3) ggplot(df, aes(values1, values2)) + geom_point(aes(size=values3), alpha=.3) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ################################################################################################## #PART OF A WHOLE #static, simple ##pie chart type <- c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-") proportion <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) df <- data.frame(type, proportion) ggplot(df, aes(x="", y=proportion, fill=type)) + geom_bar(stat = "identity") + scale_fill_manual(values = c("#40BFBF","#D1D075","#2A7E5D","#97C34B","#2A6D7E","#7DD4A8","#D5B081","#B6E7E0" )) + coord_polar("y") + theme_void() ##pie chart type <- c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-") proportion <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) df <- data.frame(type, proportion) ggplot(df, aes(x="", y=proportion, fill=type)) + geom_bar(stat = "identity") + scale_fill_viridis_d() + coord_polar("y") + theme_void() ##stacked bars type <- rep(c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-"), 3) country <- c(rep("Norway", 8),rep("France", 8),rep("UK",8)) Norway <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) France <- c(36.0, 37.0, 9.0, 3.0, 6.0, 6.0, 2.0, 1.0) UK <- c(37.0, 31.0, 8.0, 3.0, 11.0, 7.0, 2.0, 1.0) percentage <- c(Norway, France, UK) df <- data.frame(type, country, percentage) ggplot(df, aes(x=country, y=percentage, fill=type)) + geom_bar(width = 0.5, stat = "identity") + scale_fill_manual(values = c("#40BFBF","#D1D075","#2A7E5D","#97C34B","#2A6D7E","#7DD4A8","#D5B081","#B6E7E0" )) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) type <- rep(c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-"), 3) country <- c(rep("Norway", 8),rep("France", 8),rep("UK",8)) Norway <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) France <- c(36.0, 37.0, 9.0, 3.0, 6.0, 6.0, 2.0, 1.0) UK <- c(37.0, 31.0, 8.0, 3.0, 11.0, 7.0, 2.0, 1.0) percentage <- c(Norway, France, UK) df <- data.frame(type, country, percentage) ggplot(df, aes(x=country, y=percentage, fill=type)) + geom_bar(width = 0.5, stat = "identity") + scale_fill_viridis_d() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##ring plot type <- c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-") proportion <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) df <- data.frame(type, proportion) ggplot(df, aes(x=2, y=proportion, fill=type)) + geom_bar(stat = "identity") + xlim(0.5, 2.5) + scale_fill_manual(values = c("#40BFBF","#D1D075","#2A7E5D","#97C34B","#2A6D7E","#7DD4A8","#D5B081","#B6E7E0" )) + coord_polar("y") + theme_void() ##ring plot type <- c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-") proportion <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) df <- data.frame(type, proportion) ggplot(df, aes(x=2, y=proportion, fill=type)) + geom_bar(stat = "identity") + xlim(0.5, 2.5) + scale_fill_viridis_d() + coord_polar("y") + theme_void() ##multiple Ring plot type <- rep(c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-"), 3) country <- c(rep("Norway", 8), rep("UK",8), rep("blank",8) ) Norway <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) UK <- c(37.0, 31.0, 8.0, 3.0, 11.0, 7.0, 2.0, 1.0) blank <- c(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) percentage <- c(Norway, UK, blank) df <- data.frame(type, country, percentage) ggplot(df, aes(x=country, y=percentage, fill=type)) + geom_bar(stat = "identity", width = .85) + scale_fill_manual(values = c("#40BFBF","#D1D075","#2A7E5D","#97C34B","#2A6D7E","#7DD4A8","#D5B081","#B6E7E0" )) + coord_polar("y") + theme_void() ##multiple Ring plot type <- rep(c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-"), 3) country <- c(rep("Norway", 8), rep("UK",8), rep("blank",8) ) Norway <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) UK <- c(37.0, 31.0, 8.0, 3.0, 11.0, 7.0, 2.0, 1.0) blank <- c(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) percentage <- c(Norway, UK, blank) df <- data.frame(type, country, percentage) ggplot(df, aes(x=country, y=percentage, fill=type)) + geom_bar(stat = "identity", width = .85) + scale_fill_viridis_d() + coord_polar("y") + theme_void() ##pie and ring plot type <- rep(c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-"), 2) country <- c(rep("Norway", 8), rep("UK",8)) Norway <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) UK <- c(37.0, 31.0, 8.0, 3.0, 11.0, 7.0, 2.0, 1.0) percentage <- c(Norway, UK) df <- data.frame(type, country, percentage) ggplot(df, aes(x=country, y=percentage, fill=type)) + geom_bar(stat = "identity", width = .85) + scale_fill_manual(values = c("#40BFBF","#D1D075","#2A7E5D","#97C34B","#2A6D7E","#7DD4A8","#D5B081","#B6E7E0" )) + coord_polar("y") + theme_void() ##pie and ring plot type <- rep(c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-"), 2) country <- c(rep("Norway", 8), rep("UK",8)) Norway <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) UK <- c(37.0, 31.0, 8.0, 3.0, 11.0, 7.0, 2.0, 1.0) percentage <- c(Norway, UK) df <- data.frame(type, country, percentage) ggplot(df, aes(x=country, y=percentage, fill=type)) + geom_bar(stat = "identity", width = .85) + scale_fill_viridis_d()+ coord_polar("y") + theme_void() #static, subcomponents ##pie and ring plot country <- c(rep("Norway", 8)) type <- rep(c("O","A","B","AB"), 2) subtype <- c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-") percentage <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) df <- data.frame(country, type, subtype, percentage) ggplot( ) + geom_bar(data=df, aes(x=1, y=percentage, fill=type), stat="identity") + geom_bar(data=df, aes(x=2, y=percentage, fill=subtype), stat="sum") + geom_hline(yintercept = c(0, 39, 47,51), colour="#451849", size=1.5) + scale_fill_manual(values = c("#2874A6", "#3498DB", "#85C1E9", "#148F77","#1ABC9C","#76D7C4", "#B7950B","#F1C40F","#F7DC6F", "#922B21","#C0392B","#D98880" )) + xlim(0, 2.5) + coord_polar("y") + theme_void() ##pie and ring plot country <- c(rep("Norway", 8)) type <- rep(c("O","A","B","AB"), 2) subtype <- c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-") percentage <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) df <- data.frame(country, type, subtype, percentage) ggplot( ) + geom_bar(data=df, aes(x=1, y=percentage, fill=type), stat="identity") + geom_bar(data=df, aes(x=2, y=percentage, fill=subtype), stat="sum") + geom_hline(yintercept = c(0, 39, 47,51), colour="#451849", size=1.5) + scale_fill_manual(values = c("#2874A6", "#3498DB", "#85C1E9", "#148F77","#1ABC9C","#76D7C4", "#B7950B","#F1C40F","#F7DC6F", "#922B21","#C0392B","#D98880" )) + xlim(0, 2.5) + coord_polar("y") + theme_void() ##bar plot with subcomponents country <- c(rep("Norway", 8)) type <- rep(c("O","A","B","AB"), 2) subtype <- c("O+", "A+", "B+", "AB+", "O-", "A-", "B-", "AB-") percentage <- c(33.2, 41.6, 6.8, 3.4, 5.8, 7.4, 1.2, 0.6) df <- data.frame(country, type, subtype, percentage) ggplot( ) + geom_bar(data=df, aes(x=1, y=percentage, fill=type), width=.5, stat="identity") + geom_bar(data=subset(df, type == "B"), aes(x=2, y=(percentage*100/8), fill=subtype), width=.5 , stat="sum") + geom_segment(data=df, aes(x=0.75,y=39, xend=1.25, yend=39), color="darkblue", size=1) + geom_segment(data=df, aes(x=0.75,y=47, xend=1.25, yend=47), color="darkblue", size=1) + geom_segment(data=df, aes(x=1.25,y=39, xend=1.75, yend=0), color="darkblue", size=1) + geom_segment(data=df, aes(x=1.25,y=47, xend=1.75, yend=100), color="darkblue", size=1) + geom_segment(data=df, aes(x=1.75,y=0, xend=2.25, yend=0), color="darkblue", size=1) + geom_segment(data=df, aes(x=1.75,y=100, xend=2.25, yend=100), color="darkblue", size=1) + scale_fill_manual(values = c("#3498DB", "#1ABC9C", "#F1C40F","#F7DC6F", "#C0392B","#D98880" )) + xlim(0.5, 2.5) + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) + theme_void() #changing over time, few periods, relative and absolute #stacked bars year <- rep(c("1988", "1998", "2008", "2018"), 2) gender <- c(rep("male",4), rep("female",4)) absolute <- c(2076155,2185106,2359690,2668371,2122134,2232493,2377481,2627248) percentage <- c(49.45,49.46,49.81,50.39,50.55,50.54,50.19,49.61) df <- data.frame(year, gender, absolute, percentage) ggplot(df, aes(x=year, y=absolute)) + geom_bar(aes(fill=gender), stat="identity") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #changing over time, few periods, relative only #stacked bars 100% year <- rep(c("1988", "1998", "2008", "2018"), 10) age <- rep(c("0-9 years","10-19 years", "20-29 years", "30-39 years", "40-49 years", "50-59 years", "60-69 years", "70-79 years", "80-89 years", "90 years or more"), each=4) population <- c(522063.5, 607944.5, 595080.5, 619083.5, 610809.5, 535235.5, 632107.5, 639091.5, 652045.0, 629437.5, 589126.0, 713182.0, 621213.0, 664975.0, 682810.0, 712876.5, 529530.0, 616922.5, 681520.5, 731853.5, 383535.5, 511848.5, 605999.5, 686234.5, 422845.5, 349618.5, 475742.0, 574534.5, 316744.0, 329065.0, 286713.0, 410684.0, 132696.5, 162373.5, 185147.0, 179545.5, 18005.0, 24043.5, 33965.5, 44830.0) percentage <- c(12.40, 13.72, 12.48, 11.65, 14.51, 12.08, 13.26, 12.03, 15.49, 14.20, 12.36, 13.43, 14.76, 15.01, 14.32, 13.42, 12.58, 13.92, 14.29, 13.78, 9.11, 11.55, 12.71, 12.92, 10.05, 7.89, 9.98, 10.82, 7.52, 7.43, 6.01, 7.73, 3.15, 3.66, 3.88, 3.38, 0.43, 0.54, 0.71, 0.84) df <- data.frame(year, age, population, percentage) ggplot(df, aes(x=year, y=percentage)) + geom_bar(aes(fill=age), stat="identity") + scale_fill_viridis_d() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #changing over time, many periods, relative and absolute #stacked bars year <- c(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017) gender <- rep(c("male", "female"), each=18) students <- c(74914, 76958, 83328, 84097, 85144, 84967, 83805, 81474, 83245, 87623, 90550, 94376, 98449, 103612, 103485, 107690, 110570, 113038, 111088, 115939, 125365, 125673, 125857, 126297, 127424, 126789, 130938, 135297, 137197, 141464, 147123, 149705, 152103, 158738, 162657, 164599) df <- data.frame(year, gender, students) ggplot(df, aes(x=year, y=students)) + geom_bar(aes(fill=gender), stat="identity") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #area year <- c(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017) gender <- rep(c("male", "female"), each=18) students <- c(74914, 76958, 83328, 84097, 85144, 84967, 83805, 81474, 83245, 87623, 90550, 94376, 98449, 103612, 103485, 107690, 110570, 113038, 111088, 115939, 125365, 125673, 125857, 126297, 127424, 126789, 130938, 135297, 137197, 141464, 147123, 149705, 152103, 158738, 162657, 164599) df <- data.frame(year, gender, students) ggplot(df, aes(x=year, y=students)) + geom_area(aes(fill=gender), stat="identity") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #line year <- c(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017) gender <- rep(c("male", "female"), each=18) students <- c(74914, 76958, 83328, 84097, 85144, 84967, 83805, 81474, 83245, 87623, 90550, 94376, 98449, 103612, 103485, 107690, 110570, 113038, 111088, 115939, 125365, 125673, 125857, 126297, 127424, 126789, 130938, 135297, 137197, 141464, 147123, 149705, 152103, 158738, 162657, 164599) df <- data.frame(year, gender, students) ggplot(df, aes(x=year, y=students)) + geom_line(aes(color=gender), size=2) + ylim(0,175000) theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #changing over time, many periods, relative only #area 100% year <- c(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017) gender <- rep(c("male", "female"), each=18) students <- c(74914, 76958, 83328, 84097, 85144, 84967, 83805, 81474, 83245, 87623, 90550, 94376, 98449, 103612, 103485, 107690, 110570, 113038, 111088, 115939, 125365, 125673, 125857, 126297, 127424, 126789, 130938, 135297, 137197, 141464, 147123, 149705, 152103, 158738, 162657, 164599) percentage <- c(40.28, 39.90, 39.93, 40.09, 40.35, 40.22, 39.67, 39.12, 38.87, 39.31, 39.76, 40.02, 40.09, 40.90, 40.49, 40.42, 40.47, 40.71, 59.72, 60.10, 60.07, 59.91, 59.65, 59.78, 60.33, 60.88, 61.13, 60.69, 60.24, 59.98, 59.91, 59.10, 59.51, 59.58, 59.53, 59.29) df <- data.frame(year, gender, students, percentage) ggplot(df, aes(x=year, y=percentage)) + geom_area(aes(fill=gender), stat="identity") + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) ##### NOT WORKING YET#### #area NEEDS continuous year <- as.numeric(rep(c("1988", "1998", "2008", "2018"), 2)) gender <- c(rep("male",4), rep("female",4)) absolute <- c(2076155, 2185106, 2359690, 2668371, 2122134, 2232493, 2377481, 2627248) percentage <- c(49.45, 49.46, 49.81, 50.39, 50.55, 50.54, 50.19, 49.61) df <- data.frame(year, gender, absolute, percentage) ggplot(df, aes(x=year, y=percentage, fill=gender)) + geom_area() + theme( plot.title = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) #changing over time, few periods, relative year <- rep(c("1988", "1998", "2008", "2018"), 3) citizenship <- c(rep("Sweden",4), rep("Denmark",4), rep("UK",4), rep("total",4)) absolute <- c(12037, 20629, 29886, 43963, 17562, 18438, 20461, 22806, 12770, 10798, 12024, 16189, 4198289, 4417599, 4737171, 5295619) percentage <- c(0.29, 0.47, 0.63, 0.83, 0.42, 0.42, 0.43, 0.43, 0.30, 0.24, 0.25, 0.31, 100, 100, 100, 100)