# R notes

``````df[-(1:5), ] # Omit first 5 rows of my_df
df[, -4]     # Omit fourth column of my_df
``````
``````lapply(X, FUN, …)
sapply(X, FUN, …, simplify = TRUE, USE.NAMES = TRUE)
vapply(X, FUN, FUN.VALUE, …, USE.NAMES = TRUE)
replicate(n, expr, simplify = "array")
simplify2array(x, higher = TRUE)
``````
``````mean(c(1:9, NA), trim=0.1, na.rm = TRUE)
``````
``` ```
``` ```
``` ```
``` ```
``` ```
``` ```
``` ```
``` ```
``` Environments If a variable name is not defined inside a function, R will look one level up.If a name also isn’t defined there, an error occurs: x=1 rm(x) #remove a definition of x g <- function() { y <- 1 c(x,y) } Function should never depend on variables (that could be changed outside) other than the arguments. Each call to a function has its own clear environment: j <- function(){ if(!exsist("a")) { a <- 1 } else { a <- a+1 } print (a) } Two types of vectors in R: Atomic vectors (homogeneous) of 6 types: logical; integer; double;character; complex; raw. Lists (heterogeneous recursive vectors) – can contain other lists Every vector has 2 key properties: typeof(), length(). Additional meta data can be added to a vector through attributes. Creating and subsetting a list: x1 <- list(a, b, c(TRUE, FALSE), 1:10) x2 <- list(x1, a = list(-1, -3), b = 1:3, c = "a string", d = pi) x[i] - exstracts a sublist x[[i]], x\$i - extract elements (remove a level of hierarchy) >str(x2) List of 1 \$ a:List of 2 ..\$ : num -1 ..\$ : num -3 >str(x2[]) List of 2 \$ : num -1 \$ : num -3 Missing values typeof(NULL) is NULL #indicates the absence of a vector length(NULL) is 0 typeof(NA) is logical #indicates the absence of a value in a vector length(NA) is 1 Any basic math or logical comparison with NA always results in NA: NA + 10 //NA NA > 5 //NA 10 == NA //NA NA == NA //NA Use seq_along() instead of 1:ncol(df) as a loop iterator to provide handling for empty cases: for (i in seq_along(empty_df)) { print(median(empty_df[[i]])) } x <- c(1:10, NA) rescale01 <- function(x) { rng <- range(x, na.rm = TRUE) return((x - rng) / (rng - rng)) } # Count how many elements are missing in both x and y both_na <- function(x,y) sum(is.na(x) & is.na(y)) Function calculates a confidence interval for a population mean: mean_ci <- function(x, mean_ci <- function(x, level = 0.95) { if (length(x) == 0) { warning("`x` was empty", call. = FALSE) return(c(-Inf, Inf)) } else { se <- sd(x) / sqrt(length(x)) alpha <- 1 - level return(mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))) } } >cat(LETTERS[1:4]) #the function designed just to display output A B C D > paste(LETTERS[1:4])  "A" "B" "C" "D" #replace NAs: x[is.na(x)] <- replacement #sampling sample(c(1:5, NA), 100, replace = TRUE) #rnorm() #qnorm() #Allocation output <- numeric(ncol(df)) {purrr} map functions 1. Loop over a vector .x 2. Applie the function .f to each element 3. Return the results, preserved from the input: map() returns a list or data frame map_lgl() returns a logical vector map_int() returns a integer vector map_dbl() returns a double vector map_chr() returns a character vector map_dbl(df, mean, trim = 0.5, na.rm = TRUE) sapply(df, mean) ########################################## col_summary <- function(df, fun) { output <- numeric(ncol(df)) for (i in seq_along(df)) { output[[i]] <- fun(df[[i]]) } output } col_summary(df, mean) ########################################## library(purr) map_dbl(df, mean) # Find the columns that are numeric map_lgl(df, is.numeric) # Find the type of each column map_chr(df, typeof) # Find a summary of each column map(df, summary) # Find the 5th percentile of each column, excluding missing values map_dbl(.df, quantile, probs = c(0.05), na.rm = TRUE) Using an anonymous function map(cyl, function(df) lm(mpg~wt, data=df)) Using a formula map(cyl, ~ lm(mpg ~ wt, data = .)) Using a string list_of_results <- list( list(a = 1, b = "A"), list(a = 2, b = "C"), list(a = 3, b = "D") ) #pull out the "a" element from every entry with string shortcut map(list_of_results, "a") # Save the result of linear model models<- map(cyl, ~ lm(mpg ~ wt, data = .)) # Use map and coef to get the coefficients for each model: coefs coefs<- map(models, coef) # Use string shortcut to extract the wt coefficient map(coefs, "wt") Using a numeric vector coefs <- map(models, coef) # use map_dbl with the numeric shortcut to pull out the second element map_dbl(coefs,2) Pipe operator: %>% x %>% f(y) is another way of writing f(x, y) #Downloading the HTML files at each URL urls <- list( example = "http://example.org", rproj = "http://www.r-project.org", asdf = "http://asdfasdasdkfjlda" ) #generates an error: map(urls, readLines) #generates a list with returned value and an error map(urls, safely(readLines)) # Extract the result from one of the successful elements (html\$example)\$result # Extract the error from the element that was unsuccessful html\$asdf\$error #transpose() turns a list-of-lists "inside-out": # Extract the results: transpose(html)\$result # Extract the errors: transpose(html)\$error # Initialize some objects safe_readLines <- safely(readLines) html <- map(urls, safe_readLines) res <- transpose(html)[["result"]] errs <- transpose(html)[["error"]] # Create a logical vector is_ok is_ok <- (map_lgl(errs, is_null)) # Extract the successful results res[is_ok] # Find the URLs that were unsuccessful urls[!is_ok] #3 calles of rnorm() rnorm(5) rnorm(10) rnorm(20) #one call of map() map(list(5,10,20), rnorm) #rnorm(n, mean=0, sd=1) rnorm(5, mean=1) rnorm(10, mean=5) rnorm(20, mean=10) map2() to iterate over 2 arguments map2(list(5,10,20), list(1,5,10), rnorm) pmap() to iterate over many arguments #pmap(.l, .f, ...) rnorm(5, mean=1, sd=0.1) rnorm(10, mean=5, sd=0.5) rnorm(20, mean=10, sd=0.1) pmap(list(n = list(5, 10, 20), mean = list(1,5,10), sd = list(0.1, 0.5, 0.1)), rnorm) invoke_map() to iterate over functions and arguments #invoke_map(.f, .x = list(NULL), ...) rnorm(5) runif(5) rexp(5) invoke_map(list(rnorm, runif, rexp), n=5) — walk() operates just like map() except it’s designed for functions that don’t return anything. — use walk() for functions with side effects like printing, plotting or saving. — walk() functions return the object you passed to them (they can easily be used in pipelines). library(ggplot2) plots <- mtcars\$cyl %>% map(~ggplot(., aes(mpg, wt)) + geom_point()) paths <- paste0(names(plots, "pdf") #take a description of a plot and save it to disc walk2(paths, plots, ggsave) #take a peek at a contents of X and find the lengths, in one line lengths <- x %>% walk(print) %>% map_dbl(length) rexp() runif() Robust Functions Three main problems: Type-unstable functionsNon-standart evaluation Hidden arguments —————— Type inconsistent: the type of the return object depends on the input.E.g. df[,1] – sometimes returns a data frame, sometimes – a vector. Two common solutions for [ ]#1 #Setting drop = FALSE forces single bracket sunsetting to be type-consistent last_row <- function(df){ df[nrow(df), , drop=FALSE] #returns data frame #df[nrow(df),] #returns int } df <- data.frame(x=1:3) last_row(df) #2 #Subsetting the data frame like a list df[x] Note: all functions in {purrr} are type-consistent E.g. map() is a type consistent function - always returns a list. col_classes <- function(df) { class_list <- map(df, class) # Use map_chr() to extract first element in class_list map_chr(class_list,1 ) } flatten_chr() takes a list and removes its hierarchy and will either return a character string or an error message. ------------------------------- Non-standart evaluation - functions which don't use the usual loock-up rules for variables - may cause problem when use it in own functionsFor possible solutions read Hadley Wickham's vignette. #evaluated inside mtcars #It does not exist in global environment subset(mtcars, disp > 450) Pure functions Output only depends on inputDon't effect the outside environment expect theit return value Hidden arguments are function inputs that may be different for different users or sessions. E.g. arguments defaults that depend of global options.Global options are settings that effect entire R sessionGetting and setting options: #get a list with all values of global options options() #An option that controls how many digits to print for numeric values getOption("digits") options(digits = 5) getOption("stringsAsFactors") ```
``` ```
``` ```
``` ```
``` ```
``` ```
``` ```
``` ```
``` Search for: ```
``` ```
``` Recent Posts WordPress Resources at SiteGround Hello world! Archives June 2018 June 2020 M T W T F S S « Jun     1234567 891011121314 15161718192021 22232425262728 2930   ©2020 * WebMust var ElementorProFrontendConfig = {"ajaxurl":"https:\/\/webmust.org\/wp-admin\/admin-ajax.php","nonce":"bd472587bb","shareButtonsNetworks":{"facebook":{"title":"Facebook","has_counter":true},"twitter":{"title":"Twitter"},"google":{"title":"Google+","has_counter":true},"linkedin":{"title":"LinkedIn","has_counter":true},"pinterest":{"title":"Pinterest","has_counter":true},"reddit":{"title":"Reddit","has_counter":true},"vk":{"title":"VK","has_counter":true},"odnoklassniki":{"title":"OK","has_counter":true},"tumblr":{"title":"Tumblr"},"delicious":{"title":"Delicious"},"digg":{"title":"Digg"},"skype":{"title":"Skype"},"stumbleupon":{"title":"StumbleUpon","has_counter":true},"telegram":{"title":"Telegram"},"pocket":{"title":"Pocket","has_counter":true},"xing":{"title":"XING","has_counter":true},"whatsapp":{"title":"WhatsApp"},"email":{"title":"Email"},"print":{"title":"Print"}},"facebook_sdk":{"lang":"en_US","app_id":""}}; var elementorFrontendConfig = {"environmentMode":{"edit":false,"wpPreview":false},"is_rtl":false,"breakpoints":{"xs":0,"sm":480,"md":768,"lg":1025,"xl":1440,"xxl":1600},"version":"2.5.16","urls":{"assets":"https:\/\/webmust.org\/wp-content\/plugins\/elementor\/assets\/"},"settings":{"page":[],"general":{"elementor_global_image_lightbox":"yes","elementor_enable_lightbox_in_editor":"yes"}},"post":{"id":495,"title":"R notes","excerpt":""}}; ```