library(ggplot2)
library(plyr) # always load plyr before dplyr, Mr. Wickham uses overlapping function names for some reason
library(dplyr)
library(wesanderson)
# sources:
# get dplyr props right: http://stackoverflow.com/questions/18057081/ddply-summarise-proportional-count
# ggplot position arg: http://sape.inf.usi.ch/quick-reference/ggplot2/position
# stacked bar: http://stackoverflow.com/questions/3619067/stacked-bar-chart-in-r-ggplot2-with-y-axis-and-bars-as-percentage-of-counts
# stacked area: http://stackoverflow.com/questions/4651428/making-a-stacked-area-plot-using-ggplot2
# stacked area: http://stackoverflow.com/questions/12323060/geom-area-plot-with-areas-and-outlines-ggplot
# read in data
pheData <- read.csv("C:/Users/cflagg/Documents/GitHub/codingSupportGroup/dataSkillsOSIS/data/NEON/phe_2014/NEON.D10.STER.DP1.10055.001.phe_statusintensity.csv", header = T)
# scramble phenophaseStatus for now, since it's all homogeneous
# DON'T RUN ON FULL DATASET
pheData$phenophaseStatus <- sample(pheData$phenophaseStatus)
# these are the correct totals, but not in an ideal format
xtabs(~dayOfYear+phenophaseName+phenophaseStatus, pheData)
## , , phenophaseStatus = missed
##
## phenophaseName
## dayOfYear Initial growth Leaves Open flowers
## 90 3 2 2
## 98 1 1 0
## 111 4 2 3
## 119 3 2 1
## 133 0 2 1
## 147 1 1 0
## 161 0 2 1
## 176 2 5 2
## 181 3 0 1
## 184 2 1 3
## 188 2 3 2
## 191 1 0 1
##
## , , phenophaseStatus = no
##
## phenophaseName
## dayOfYear Initial growth Leaves Open flowers
## 90 14 14 18
## 98 17 16 15
## 111 17 18 16
## 119 18 21 19
## 133 22 20 13
## 147 23 20 23
## 161 21 19 21
## 176 18 16 21
## 181 15 18 18
## 184 14 14 22
## 188 22 19 22
## 191 18 18 20
##
## , , phenophaseStatus = yes
##
## phenophaseName
## dayOfYear Initial growth Leaves Open flowers
## 90 13 14 10
## 98 12 13 15
## 111 9 10 11
## 119 9 7 10
## 133 8 8 16
## 147 6 9 7
## 161 9 9 8
## 176 10 9 7
## 181 12 12 11
## 184 14 15 5
## 188 6 8 6
## 191 11 12 9
# dplyr method
# how do we pass the total individualID count without hard coding it? Coun't it before hand with a ddply
# don't want to hardcode because # of individuals could be different because of missing tags or field error
# obtain count of individualID per growthForm per dayOfYear -- this = totalID_n
pheData2 <- ddply(pheData, .(dayOfYear, growthForm), mutate, totalID_n = length(unique(individualID))) # first calculate total individuals to get
pheData2 %>%
dplyr::group_by(dayOfYear, phenophaseName, phenophaseStatus, growthForm,totalID_n) %>%
dplyr::summarize(n = length(phenophaseStatus)) %>%
dplyr::mutate(prop = (n/totalID_n)*100) %>%
# limit to beginning and end days -- full plot looks weird
dplyr::filter(phenophaseName == "Leaves", dayOfYear %in% c(90,191)) %>%
ggplot(aes(x = dayOfYear, y = prop)) + geom_bar(aes(fill=phenophaseStatus), stat="identity", position = "dodge") + scale_fill_manual(values = wes_palette("Darjeeling"))
# this is why the boxes are so even
xtabs(~phenophaseStatus+dayOfYear+phenophaseName, pheData2)
## , , phenophaseName = Initial growth
##
## dayOfYear
## phenophaseStatus 90 98 111 119 133 147 161 176 181 184 188 191
## missed 3 1 4 3 0 1 0 2 3 2 2 1
## no 14 17 17 18 22 23 21 18 15 14 22 18
## yes 13 12 9 9 8 6 9 10 12 14 6 11
##
## , , phenophaseName = Leaves
##
## dayOfYear
## phenophaseStatus 90 98 111 119 133 147 161 176 181 184 188 191
## missed 2 1 2 2 2 1 2 5 0 1 3 0
## no 14 16 18 21 20 20 19 16 18 14 19 18
## yes 14 13 10 7 8 9 9 9 12 15 8 12
##
## , , phenophaseName = Open flowers
##
## dayOfYear
## phenophaseStatus 90 98 111 119 133 147 161 176 181 184 188 191
## missed 2 0 3 1 1 0 1 2 1 3 2 1
## no 18 15 16 19 13 23 21 21 18 22 22 20
## yes 10 15 11 10 16 7 8 7 11 5 6 9
pheData2 %>%
dplyr::group_by(dayOfYear, phenophaseName, phenophaseStatus, growthForm,totalID_n) %>%
dplyr::filter() %>%
ggplot(aes(x = phenophaseStatus, y = dayOfYear, colour = phenophaseName, fill = phenophaseName)) + geom_boxplot() + scale_fill_manual(values = wes_palette("Darjeeling"))
pheData2 %>%
group_by(dayOfYear, phenophaseName, phenophaseStatus, growthForm,totalID_n) %>%
summarize(n = length(phenophaseStatus)) %>%
mutate(prop = (n/totalID_n)*100) %>%
filter(phenophaseName == "Leaves") %>%
ggplot(aes(x = dayOfYear, y = prop, group = phenophaseStatus)) + geom_line(aes(colour=phenophaseStatus), size = 2)+ scale_colour_manual(values = wes_palette("Darjeeling"))
# + facet_wrap(~phenophaseName)
pheData2 <- ddply(pheData, .(dayOfYear, growthForm), mutate, totalID_n = length(unique(individualID))) # first calculate total individuals to get denominator of proportion -- by day and growthForm (eventually)
pheData2 %>%
group_by(dayOfYear, phenophaseName, phenophaseStatus, growthForm,totalID_n) %>%
summarize(n = length(phenophaseStatus)) %>%
mutate(prop = (n/totalID_n)*100) %>%
filter(phenophaseName == "Leaves") %>%
ggplot(aes(x = dayOfYear, y = prop)) +
geom_area(aes(group = phenophaseStatus, fill = phenophaseStatus), position = "dodge", alpha=0.5) + scale_fill_manual(values = wes_palette("Darjeeling"))