All technological notes.
function_name <- function(arg1, arg2, arg3, ...) {
## do any code in here when called
return(return_object)
}
## Define a function that adds two numbers
add_numbers <- function(x, y) {
return(x + y)
}
if and else: testing a condition and acting on itfor: execute a loop a fixed number of timeswhile: execute a loop while a condition is truerepeat: execute an infinite loop (must break out of it to stop)break: break the execution of a loopnext: skip an iteration of a loopif(condition){
## do any code here
}
if(condition){
## do any code here
}
else{
## do any code here
}
## Sequential stacking of if-else statements
if(condition){
## do any code here
}
else if(condition){
## do any code here
}
else{
## do any code here
}
for(loop_index in loop_vector) {
## do any code in here
}
for(i in c(1:5)) {
print(i) ## Print the value of i
}
while(condition) {
## do any code in here
}
a=1
while(a <= 5) {
print(a) ## Print the value of a
a = a + 1 ## Increment the value of a
}
a=1
repeat {
if(a == 5){
break ## if the value of a is 5, terminate the loop
}
else {
print(a)
a = a+1
}
}
| Function | Output Data Type | Description |
|---|---|---|
apply() |
Array or Matrix | Applies a function over the margins of an array (e.g., rows or columns). Works with arrays, matrices, and data frames. |
lapply() |
List | Applies a function to each element of a list and returns a list. Useful for operations on lists where the output length may vary. |
sapply() |
Depends on Input | Simplifies the output of lapply() into a vector or matrix. Attempts to simplify the result into the most meaningful data structure. |
tapply() |
Varies | Applies a function over subsets of a vector, array, or data frame, grouped by a given factor or list of factors. Useful for applying a function by groups. |
mapply() |
Depends on Input | Applies a function to the first elements of each argument, the second elements, and so on. Useful for element-wise operations on multiple vectors or lists. |
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
result <- lapply(x, mean) ## compute the list mean for each list element
result
# $a
# [1] 5.5
# $beta
# [1] 4.535125
# $logic
# [1] 0.5
class(result)
# [1] "list"
x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
result <- sapply(x, mean) ## compute the list mean for each list element
result
# a beta logic
# 5.500000 4.535125 0.500000
class(result)
# [1] "numeric"
# "numeric" because all the elements in the result vector are numeric.
groups <- as.factor(rbinom(32, n = 5, prob = 0.4))
groups
# [1] 9 11 13 17 13
# Levels: 9 11 13 17
tapply(groups, groups, length) ## is almost the same as table(groups)
# 9 11 13 17
# 1 1 2 1
table(groups)
# groups
# 9 11 13 17
# 1 1 2 1
x <- matrix(1:12,nrow=4,ncol=3)
# [,1] [,2] [,3]
# [1,] 1 5 9
# [2,] 2 6 10
# [3,] 3 7 11
# [4,] 4 8 12
apply(x, 1, sum) ## Take the mean of each row
# [1] 15 18 21 24
apply(x, 2, sum) ## Take the mean of each column
# [1] 10 26 42
## Define a function that adds two numbers
add_numbers <- function(x, y) { return(x + y) }
# Create two vectors
vector1 <- c(1, 2, 3)
vector2 <- c(4, 5, 6)
# Apply the add_numbers function element-wise to the two vectors
result <- mapply(add_numbers, vector1, vector2)
# [1] 5 7 9
dplyrfilter(): Used for sub-setting rows based on conditionsarrange(): Sorts rows based on one or more variablesslice(): Chooses rows based on locationselect(): Picks specific columns from a data framemutate(): Creates new variables or modifies existing ones, new colrename(): Rename variables in a data framem, rename colsummarize(): Generate summary statistics of different variables, it collapses a group into a single row%>%: the “pipe” operator is used to connect multiple verb actions together into a pipelineSyntax
data frame.$ operator (just use the column names)data frame# Installing dplyr package
install.packages('dplyr', dependencies = T)
# Loading package
library('dplyr')
starwars %>% filter(skin_color == "light", eye_color == "brown")
starwars %>% arrange(height) ## By default ascending sorting
starwars %>% arrange(desc(height)) ## desc is used for descending
starwars %>% arrange(height, mass) ## Sorting based on multiple columns
starwars %>% arrange(desc(height), mass)
starwars %>% slice(5:10)
starwars %>% slice_head(n=5) ## slice_head() and slice_tail() select the first or last rows
starwars %>% slice_random(n=5) ## slice_sample() randomly selects rows
starwars %>% filter(!is.na(height)) %>% slice_max(height, n=5) ## slice_min() and slice_max() select rows with highest or lowest values of a variable. We first must choose only the values which are not NA
# Select columns by name
starwars %>% select(hair_color, skin_color, eye_color)
# Select all columns between hair_color and eye_color (inclusive)
starwars %>% select(hair_color:eye_color)
# Select all columns except those from hair_color to eye_color (inclusive)
starwars %>% select(!(hair_color:eye_color))
# Select all columns ending with color.
starwars %>% select(ends_with("color"))
# Other choices are starts_with(), matches() and contains()
# Renaming a column from homeworld to home_world
starwars %>% rename(home_world = homeworld)
starwars %>% mutate(height_m = height / 100)
# If you only want to keep the new variables, use .keep = "none"
# We can’t see the height in meters we just calculated, but we can fix that using a select command
starwars %>%
mutate(height_m = height / 100) %>%
select(height_m, height, everything())
group_by()starwars %>% summarise(height = mean(height, na.rm = TRUE))
# An example of using summarise with group_by()
starwars %>%
group_by(species, sex) %>%
select(height, mass) %>%
summarise(
height = mean(height, na.rm = TRUE),
mass = mean(mass, na.rm = TRUE)
)