### Davis R Users Group: Teaching R Live Code ### https://tinyurl.com/teachingR # link to dropbox version ### https://tinyurl.com/teachingRnew # link to raw script ### FYI: This is a frequently updated R script used for "live-coding" while ### teaching. Contents may be removed/altered/changed and saved to the ### Davis R Users Group website: https://d-rug.github.io/ ################################################################################ ## Manipulating, analyzing, and exporting data with tidyverse: ## group_by(), summarize(), arrange(), and count() ################################################################################ # Getting started --------------------------------------------------------- # First let's download a data set called "portal_data_joined.csv" # We'll put it in our data folder inside of our project directory. # If you don't already have a data directory, you can create one in # the files pane. Alternatively, you can download the data by hand: # https://tinyurl.com/y36xgftg download.file(url = "https://tinyurl.com/y36xgftg", destfile = "data/portal_data_joined.csv") install.packages("tidyverse") library(tidyverse) surveys <- read_csv("data/portal_data_joined.csv") str(surveys) View(surveys) # Review of dplyr commands ------------------------------------ ## Select (columns) and Filter (rows) select(surveys, plot_id, species_id, weight) select(surveys, -record_id, -species_id) surveys_1996 <- filter(surveys, year == 1996) ## Pipes surveys %>% filter(weight < 5) %>% select(species_id, sex, weight) ## Mutate surveys %>% mutate(weight_kg = weight/1000) mutate(surveys, weight_kg = weight/1000) # split-apply-combine ----------------------------------------------------- surveys %>% group_by(sex) %>% summarize(mean_weight = mean(weight, na.rm = T)) surveys %>% group_by(sex) surveys %>% group_by(sex, species_id) %>% summarize(mean_weight = mean(weight, na.rm = T)) surveys %>% filter(!is.na(weight)) %>% group_by(sex, species_id) %>% summarize(mean_weight = mean(weight)) surveys %>% filter(!is.na(weight)) %>% group_by(sex, species_id) %>% summarize(mean_weight = mean(weight), min_weight = min(weight)) surveys %>% filter(!is.na(weight)) %>% group_by(sex, species_id) %>% summarize(mean_weight = mean(weight), min_weight = min(weight)) %>% arrange(min_weight, mean_weight) surveys %>% filter(!is.na(weight)) %>% group_by(sex, species_id) %>% summarize(mean_weight = mean(weight), min_weight = min(weight)) %>% arrange(desc(mean_weight)) surveys %>% count() # The two command below are equivalent surveys %>% count(sex) surveys %>% group_by(sex) %>% summarize(count = n()) surveys %>% count(sex, sort = TRUE) surveys %>% count(sex, species) surveys %>% count(sex, species) %>% arrange(species, desc(n)) # give EVERYTHING! all combos even if not in data surveys %>% group_by(sex, species, .drop = F) %>% tally() %>% ungroup() %>% complete(sex, species, fill = list(n = 0)) %>% arrange(species, desc(n)) # get only things that are in the data surveys %>% group_by(sex, species, .drop = F) %>% tally() %>% ungroup() %>% complete(nesting(sex, species), fill = list(n = 0)) %>% arrange(species, desc(n)) ## want to replace NAs? ## look at the replace argument ## want M, bilineata, which is 0, to appear? ## use the group_by() function, and the argument .drop = F ## Challenge ## ## 1a. How many animals were caught in each plot_type surveyed? ## ## 1b. Use group_by() and summarize() to find the mean, min, and max hindfoot ## length for each species (using species_id). Also add the number of ## observations(hint: see ?n). ## ## 1c. What was the heaviest animal measured in each year? Return the columns ## year, genus, species_id, and weight.