#Welcome to Week 4 of R-DAVIS 

#Let's review what we ended on last week: Factors 

#Factors as data: 

sex <- as.factor(c("male", "female", "male", "male"))
sex
class(sex)
typeof(sex)

#reordering factor 

sex <- factor(sex, levels = c("male", "female"))
sex

#converting factors to characters 

as.character(sex)

year_fct <- as.factor(c(1990, 1983, 1977, 1998, 1990))

as.numeric(year_fct) #doesnt work

year_fct<- as.numeric(as.character(year_fct))

#renaming factors and levels within factors 

levels(sex)

levels(sex)[1] <- "MALE"
levels(sex)

#In Class Challenge! 

levels(sex) <- c("M", "F")
levels(sex)

#reordering the levels 

sex <- factor(sex, levels = c("F", "M"))

levels(sex)

#Starting with Data 

surveys <- read.csv("data/portal_data_joined.csv") #this is a data.frame 

class(surveys)

nrow(surveys)#number of rows 
head(surveys)#the first 6 rows 
colnames(surveys)
summary(surveys)

#Callenge, using str function
str(surveys)

surveys$species_id <- as.character(surveys$species_id)

str(surveys)

#pulling out unique identifiers in a column 

unique(surveys$species_id)
unique(surveys$species)

#The dollar sign is for pulling out single columns in a data frame 

#Indexing and subsetting dataframes 

#brackets have two dimenstions for dataframe 
#[row, column]

surveys[1,1]

#first element sixth column

surveys[1, 6]

surveys[,3] #if there is a blank space R thinks we want everything 

#first three elements of 7th column 

surveys[1:3, 7]

head(surveys)

#all columns rows 1-6

surveys[6,] #just the 6th row 

surveys[1:6,]

#exclude the first column

surveys[ ,-1]

surveys[-c(7:34786),] #same as head, just the frist 6 rows 

#also call columns by their names 

surveys[1:6,"species_id"]

#Challenge: subsetting a dataframe 

surveys_200 <- surveys[200,]

nrow(surveys)

surveys[34786,]
tail(surveys)

n_rows <- nrow(surveys)

surveys[n_rows,]

#getting the exact middle row in the data frame

surveys[n_rows/2, ]

surveys[mean(n_rows),]#doesn't work because I'm taking the mean of just one number 

surveys_head <- surveys[-c(7:n_rows)]
surveys_head2 <- surveys[-c(7:n_rows),]

#TIDYVERSE HUZZAH

#install.packages("tidyverse") how to install a package 

#how to load a package
library(tidyverse)

surveys <- read_csv("data/portal_data_joined.csv")

surveys
class(surveys)

#dplyr is an awesome package 

#select and filter 

#select is for columns, and filter is for rows 
surveys_new <- select(surveys, plot_id, species_id, weight)

surveys_plot3 <- filter(surveys, plot_id == 3)

#lets talk about pipes 

#I want all weights greater than 5 and only the columns, species_id, sex, and weight


surveys2 <- filter(surveys, weight > 5)

surveys_sml <- select(surveys2, species_id, sex, weight)

# a pipe looks like this %>% 

surveys_sml2 <- surveys %>% 
  filter(weight > 5) %>% 
  select(species_id, sex, weight)