### Davis R Users Group: Teaching R Live Code ### https://tinyurl.com/teachingR # link to dropbox version ### https://tinyurl.com/teachingRnew # link to raw script ### FYI: This is a frequently updated R script used for "live-coding" while ### teaching. Contents may be removed/altered/changed and saved to the ### Davis R Users Group website: https://d-rug.github.io/ ################################################################################ ## Manipulating, analyzing, and exporting data with tidyverse ################################################################################ # Getting started --------------------------------------------------------- # First let's download a data set called "portal_data_joined.csv" # We'll put it in our data folder inside of our project directory. # If you don't already have a data directory, you can create one in # the files pane. Alternatively, you can download the data by hand: # https://tinyurl.com/y36xgftg download.file(url = "https://tinyurl.com/y36xgftg", destfile = "data/portal_data_joined.csv") install.packages("tidyverse") library(tidyverse) surveys <- read_csv("data/portal_data_joined.csv") str(surveys) View(surveys) # selecting columns and filtering rows ------------------------------------ select(surveys, plot_id, species_id, weight) select(surveys, -record_id, -species_id) # check the column names to make sure I didn't have a typo colnames(surveys) filter(surveys, year == 1995) # Pipes ------------------------------------------------------------------- surveys2 <- filter(surveys, weight < 5) surveys_sml <- select(surveys2, species_id, sex, weight) surveys_sml <- select(filter(surveys, weight < 5), species_id, sex, weight) surveys %>% filter(weight < 5) %>% select(species_id, sex, weight) surveys_sml <- surveys %>% filter(weight < 5) %>% select(species_id, sex, weight) head(surveys_sml) ## Challenge 1: ## Using pipes, subset the surveys data to include animals collected ## before 1995 and retain only the columns year, sex, and weight surveys %>% filter(year < 1995) %>% select(year, sex, weight) %>% arrange(desc(year)) # Mutate ------------------------------------------------------------------ surveys %>% mutate(weight_kg = weight/1000) surveys %>% mutate(weight_kg = weight / 1000, weight_lb = weight_kg * 2.2) %>% select(weight, weight_kg, weight_lb) surveys %>% filter(!is.na(weight)) %>% mutate(weight_kg = weight / 1000) surveys %>% mutate(weight_kg = weight / 1000) %>% filter(!is.na(weight)) ## Challenge 2: ## Create a new data frame from the surveys data that meets the following ## criteria: contains only the species_id column and a new column called ## hindfoot_half containing values that are half the hindfoot_length values. ## In this hindfoot_half column, there are no NAs and all values are less ## than 30. ## Hint: think about how the commands should be ordered to produce this ## data frame! surveys_hindfoot_half <- surveys %>% mutate(hindfoot_half = hindfoot_length / 2) %>% filter(!is.na(hindfoot_half)) %>% filter(hindfoot_half < 30) %>% select(species_id, hindfoot_half) surveys_hindfoot_half <- surveys %>% mutate(hindfoot_half = hindfoot_length / 2) %>% filter(!is.na(hindfoot_half), hindfoot_half < 30) %>% select(species_id, hindfoot_half) # the comma in filter signifies "and". If you're filtering on one # single common and you want two criteria to be met, it won't work! # In that case, you need to use the "or" (|) criteria. # how to check if dplyr did what we wanted it to do # make a histogram hist(surveys_hindfoot_half$hindfoot_half) # use the summary function summary(surveys_hindfoot_half) # fancier: # skimmr package install.packages("skimr") library(skimr) skim(surveys_hindfoot_half)