A brief summary of phenology plot ideas

Import Data & Set Libraries

library(ggplot2)
library(plyr) # always load plyr before dplyr, Mr. Wickham uses overlapping function names for some reason
library(dplyr)
library(wesanderson)

# sources: 
# get dplyr props right: http://stackoverflow.com/questions/18057081/ddply-summarise-proportional-count
# ggplot position arg: http://sape.inf.usi.ch/quick-reference/ggplot2/position
# stacked bar: http://stackoverflow.com/questions/3619067/stacked-bar-chart-in-r-ggplot2-with-y-axis-and-bars-as-percentage-of-counts
# stacked area: http://stackoverflow.com/questions/4651428/making-a-stacked-area-plot-using-ggplot2
# stacked area: http://stackoverflow.com/questions/12323060/geom-area-plot-with-areas-and-outlines-ggplot

# read in data
pheData <- read.csv("C:/Users/cflagg/Documents/GitHub/codingSupportGroup/dataSkillsOSIS/data/NEON/phe_2014/NEON.D10.STER.DP1.10055.001.phe_statusintensity.csv", header = T)

# scramble phenophaseStatus for now, since it's all homogeneous
# DON'T RUN ON FULL DATASET
pheData$phenophaseStatus <- sample(pheData$phenophaseStatus)

Calculation Check

  • Make sure the following dplyr() transforms are correct
# these are the correct totals, but not in an ideal format
xtabs(~dayOfYear+phenophaseName+phenophaseStatus, pheData)
## , , phenophaseStatus = missed
## 
##          phenophaseName
## dayOfYear Initial growth Leaves Open flowers
##       90               3      2            2
##       98               1      1            0
##       111              4      2            3
##       119              3      2            1
##       133              0      2            1
##       147              1      1            0
##       161              0      2            1
##       176              2      5            2
##       181              3      0            1
##       184              2      1            3
##       188              2      3            2
##       191              1      0            1
## 
## , , phenophaseStatus = no
## 
##          phenophaseName
## dayOfYear Initial growth Leaves Open flowers
##       90              14     14           18
##       98              17     16           15
##       111             17     18           16
##       119             18     21           19
##       133             22     20           13
##       147             23     20           23
##       161             21     19           21
##       176             18     16           21
##       181             15     18           18
##       184             14     14           22
##       188             22     19           22
##       191             18     18           20
## 
## , , phenophaseStatus = yes
## 
##          phenophaseName
## dayOfYear Initial growth Leaves Open flowers
##       90              13     14           10
##       98              12     13           15
##       111              9     10           11
##       119              9      7           10
##       133              8      8           16
##       147              6      9            7
##       161              9      9            8
##       176             10      9            7
##       181             12     12           11
##       184             14     15            5
##       188              6      8            6
##       191             11     12            9

Figures

  • Looking at phenophaseName == “Leaves” only

Figure 1 (Caren’s barplot)

  • x-axis = sites X dayOfYear
  • y-axis = proportion of same taxonID/individualID within that phenophase (number in phenophase/total in taxonID)
  • groups = phenophaseStatus (% of group in phenophase X)
# dplyr method
# how do we pass the total individualID count without hard coding it? Coun't it before hand with a ddply
# don't want to hardcode because # of individuals could be different because of missing tags or field error
# obtain count of individualID per growthForm per dayOfYear -- this = totalID_n
pheData2 <- ddply(pheData, .(dayOfYear, growthForm), mutate, totalID_n = length(unique(individualID))) # first calculate total individuals to get
pheData2 %>% 
  dplyr::group_by(dayOfYear, phenophaseName, phenophaseStatus, growthForm,totalID_n) %>% 
  dplyr::summarize(n = length(phenophaseStatus)) %>% 
  dplyr::mutate(prop = (n/totalID_n)*100) %>% 
  # limit to beginning and end days -- full plot looks weird
  dplyr::filter(phenophaseName == "Leaves", dayOfYear %in% c(90,191)) %>% 
  ggplot(aes(x = dayOfYear, y = prop)) + geom_bar(aes(fill=phenophaseStatus), stat="identity", position = "dodge") + scale_fill_manual(values = wes_palette("Darjeeling"))

Figure 2 (Lee’s box & whisker)

  • x-axis = phenophase (1,2,3) in single taxonID
  • y-axis = days in phenophase (number of individualID/total individualID)
  • group = siteID
# this is why the boxes are so even
xtabs(~phenophaseStatus+dayOfYear+phenophaseName, pheData2)
## , , phenophaseName = Initial growth
## 
##                 dayOfYear
## phenophaseStatus 90 98 111 119 133 147 161 176 181 184 188 191
##           missed  3  1   4   3   0   1   0   2   3   2   2   1
##           no     14 17  17  18  22  23  21  18  15  14  22  18
##           yes    13 12   9   9   8   6   9  10  12  14   6  11
## 
## , , phenophaseName = Leaves
## 
##                 dayOfYear
## phenophaseStatus 90 98 111 119 133 147 161 176 181 184 188 191
##           missed  2  1   2   2   2   1   2   5   0   1   3   0
##           no     14 16  18  21  20  20  19  16  18  14  19  18
##           yes    14 13  10   7   8   9   9   9  12  15   8  12
## 
## , , phenophaseName = Open flowers
## 
##                 dayOfYear
## phenophaseStatus 90 98 111 119 133 147 161 176 181 184 188 191
##           missed  2  0   3   1   1   0   1   2   1   3   2   1
##           no     18 15  16  19  13  23  21  21  18  22  22  20
##           yes    10 15  11  10  16   7   8   7  11   5   6   9
pheData2 %>% 
  dplyr::group_by(dayOfYear, phenophaseName, phenophaseStatus, growthForm,totalID_n) %>% 
  dplyr::filter() %>%
  ggplot(aes(x = phenophaseStatus, y = dayOfYear, colour = phenophaseName, fill = phenophaseName)) + geom_boxplot() + scale_fill_manual(values = wes_palette("Darjeeling"))

Figure 3 (Cody’s line-version)

  • x-axis = dayOfYear (full-series)
  • y-axis = proportion of individuals in phenophaseStatus
  • lines = phenophaseStatus (with area fill)
  • facet_wrap phenophaseName?
pheData2 %>% 
  group_by(dayOfYear, phenophaseName, phenophaseStatus, growthForm,totalID_n) %>% 
  summarize(n = length(phenophaseStatus)) %>% 
  mutate(prop = (n/totalID_n)*100) %>% 
  filter(phenophaseName == "Leaves") %>% 
  ggplot(aes(x = dayOfYear, y = prop, group = phenophaseStatus))  + geom_line(aes(colour=phenophaseStatus), size = 2)+ scale_colour_manual(values = wes_palette("Darjeeling")) 

# + facet_wrap(~phenophaseName)
  • Would look cool if the factors were re-ordered so alpha made the graph look better
pheData2 <- ddply(pheData, .(dayOfYear, growthForm), mutate, totalID_n = length(unique(individualID))) # first calculate total individuals to get denominator of proportion -- by day and growthForm (eventually)
pheData2 %>% 
  group_by(dayOfYear, phenophaseName, phenophaseStatus, growthForm,totalID_n) %>% 
  summarize(n = length(phenophaseStatus)) %>% 
  mutate(prop = (n/totalID_n)*100) %>% 
  filter(phenophaseName == "Leaves") %>% 
ggplot(aes(x = dayOfYear, y = prop)) + 
  geom_area(aes(group = phenophaseStatus, fill = phenophaseStatus), position = "dodge", alpha=0.5) + scale_fill_manual(values = wes_palette("Darjeeling"))