#Welcome to Week 4 of R-DAVIS #Let's review what we ended on last week: Factors #Factors as data: sex <- as.factor(c("male", "female", "male", "male")) sex class(sex) typeof(sex) #reordering factor sex <- factor(sex, levels = c("male", "female")) sex #converting factors to characters as.character(sex) year_fct <- as.factor(c(1990, 1983, 1977, 1998, 1990)) as.numeric(year_fct) #doesnt work year_fct<- as.numeric(as.character(year_fct)) #renaming factors and levels within factors levels(sex) levels(sex)[1] <- "MALE" levels(sex) #In Class Challenge! levels(sex) <- c("M", "F") levels(sex) #reordering the levels sex <- factor(sex, levels = c("F", "M")) levels(sex) #Starting with Data surveys <- read.csv("data/portal_data_joined.csv") #this is a data.frame class(surveys) nrow(surveys)#number of rows head(surveys)#the first 6 rows colnames(surveys) summary(surveys) #Callenge, using str function str(surveys) surveys$species_id <- as.character(surveys$species_id) str(surveys) #pulling out unique identifiers in a column unique(surveys$species_id) unique(surveys$species) #The dollar sign is for pulling out single columns in a data frame #Indexing and subsetting dataframes #brackets have two dimenstions for dataframe #[row, column] surveys[1,1] #first element sixth column surveys[1, 6] surveys[,3] #if there is a blank space R thinks we want everything #first three elements of 7th column surveys[1:3, 7] head(surveys) #all columns rows 1-6 surveys[6,] #just the 6th row surveys[1:6,] #exclude the first column surveys[ ,-1] surveys[-c(7:34786),] #same as head, just the frist 6 rows #also call columns by their names surveys[1:6,"species_id"] #Challenge: subsetting a dataframe surveys_200 <- surveys[200,] nrow(surveys) surveys[34786,] tail(surveys) n_rows <- nrow(surveys) surveys[n_rows,] #getting the exact middle row in the data frame surveys[n_rows/2, ] surveys[mean(n_rows),]#doesn't work because I'm taking the mean of just one number surveys_head <- surveys[-c(7:n_rows)] surveys_head2 <- surveys[-c(7:n_rows),] #TIDYVERSE HUZZAH #install.packages("tidyverse") how to install a package #how to load a package library(tidyverse) surveys <- read_csv("data/portal_data_joined.csv") surveys class(surveys) #dplyr is an awesome package #select and filter #select is for columns, and filter is for rows surveys_new <- select(surveys, plot_id, species_id, weight) surveys_plot3 <- filter(surveys, plot_id == 3) #lets talk about pipes #I want all weights greater than 5 and only the columns, species_id, sex, and weight surveys2 <- filter(surveys, weight > 5) surveys_sml <- select(surveys2, species_id, sex, weight) # a pipe looks like this %>% surveys_sml2 <- surveys %>% filter(weight > 5) %>% select(species_id, sex, weight)