### Davis R Users Group: Teaching R Live Code
### https://tinyurl.com/teachingR     # link to dropbox version
### https://tinyurl.com/teachingRnew  # link to raw script
### FYI: This is a frequently updated R script used for "live-coding" while 
### teaching. Contents may be removed/altered/changed and saved to the 
### Davis R Users Group website: https://d-rug.github.io/ 

################################################################################
## Manipulating, analyzing, and exporting data with tidyverse:
## group_by(), summarize(), arrange(), and count()
################################################################################


# Getting started ---------------------------------------------------------

# First let's download a data set called "portal_data_joined.csv"
# We'll put it in our data folder inside of our project directory.
# If you don't already have a data directory, you can create one in 
# the files pane. Alternatively, you can download the data by hand:
# https://tinyurl.com/y36xgftg

download.file(url = "https://tinyurl.com/y36xgftg", 
              destfile = "data/portal_data_joined.csv")

install.packages("tidyverse")
library(tidyverse)

surveys <- read_csv("data/portal_data_joined.csv")
str(surveys)
View(surveys)


# Review of dplyr commands ------------------------------------

## Select (columns) and Filter (rows)
select(surveys, plot_id, species_id, weight)
select(surveys, -record_id, -species_id)

surveys_1996 <- filter(surveys, year == 1996)

## Pipes
surveys %>% 
  filter(weight < 5) %>% 
  select(species_id, sex, weight) 

## Mutate

surveys %>% 
  mutate(weight_kg = weight/1000)

mutate(surveys, weight_kg = weight/1000)


# split-apply-combine -----------------------------------------------------

surveys %>%
  group_by(sex) %>%
  summarize(mean_weight = mean(weight, na.rm = T))

surveys %>%
  group_by(sex)

surveys %>%
  group_by(sex, species_id) %>%
  summarize(mean_weight = mean(weight, na.rm = T))

surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species_id) %>%
  summarize(mean_weight = mean(weight))

surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species_id) %>%
  summarize(mean_weight = mean(weight),
            min_weight = min(weight))

surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species_id) %>%
  summarize(mean_weight = mean(weight),
            min_weight = min(weight)) %>%
  arrange(min_weight, mean_weight)

surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species_id) %>%
  summarize(mean_weight = mean(weight),
            min_weight = min(weight)) %>%
  arrange(desc(mean_weight))

surveys %>%
  count()

# The two command below are equivalent
surveys %>%
  count(sex)

surveys %>%
  group_by(sex) %>%
  summarize(count = n())

surveys %>%
  count(sex, sort = TRUE)

surveys %>%
  count(sex, species)

surveys %>%
  count(sex, species) %>%
  arrange(species, desc(n))

# give EVERYTHING! all combos even if not in data
surveys %>%
  group_by(sex, species, .drop = F) %>%
  tally() %>%
  ungroup() %>%
  complete(sex, species, fill = list(n = 0)) %>%
  arrange(species, desc(n))

# get only things that are in the data
surveys %>%
  group_by(sex, species, .drop = F) %>%
  tally() %>%
  ungroup() %>%
  complete(nesting(sex, species), fill = list(n = 0)) %>%
  arrange(species, desc(n))

## want to replace NAs? 
##   look at the replace argument
## want M, bilineata, which is 0, to appear?
##   use the group_by() function, and the argument .drop = F
## Challenge
## 
## 1a. How many animals were caught in each plot_type surveyed?
##   
## 1b. Use group_by() and summarize() to find the mean, min, and max hindfoot 
##     length for each species (using species_id). Also add the number of
##     observations(hint: see ?n).
##
## 1c. What was the heaviest animal measured in each year? Return the columns 
##     year, genus, species_id, and weight.