DBMS - Database Management System

Open Source

  • MySQL  —  RMySQL package
  • PostgresSQl — RPostgresSQL package
  • SQLite 

Licensed

  • Oracle Database — ROracle
  • Microsoft SQL Server

     RMySQL package – implementation; DBI package – interface

Importing from DB - Relational DBs

install.packages(“RMySQL”)

# Load the DBI package
 library(DBI)

#library(RMySQL) not required

# Edit dbConnect() call
 con <- dbConnect(RMySQL::MySQL(),
            dbname = “tweater”,
            host = “courses.csrrinzqubik.us-east-1.rds.amazonaws.com”,
            port = 3306,
            user = “student”,
            password = “datacamp”) 

#REturns DBI (RMySQL) connection object. 

dbListTables(con)

dbReadTable(con, “tablename”)

table_names <- dbListTables(con)

# Import all tables
tables <- lapply(table_names, dbReadTable, conn = con)

Example1:
employees <- dbReadTable(con, “employees”)

subset(employees,

   subset = started_at > “2012-09-01”,
   select = name)
Example2:
products <- dbReadTable(con, “products”)
subset(products, subset= contract==1)

Example1:
dbGetQuery(con, “SELECT name FROM employees WHERE started_at  > \”2012-09-01\””)
Example2:
dbGetQuery(con, “SELECT * FROM products WHERE contract=1”)
**Note: contract==1 – used for NOR
Example3:
SELECT age, country FROM WHERE gender = “male” AND married = 1
Example4:

CHAR_LENGTH()

short <- dbGetQuery(con, "SELECT id AND name FROM users WHERE CHAR_LENGTH(name)<5")
SELECT name, post
  FROM users INNER JOIN tweats on users.id = user_id
    WHERE date > "2015-09-19"
SELECT post, message
  FROM tweats INNER JOIN comments on tweats.id = tweat_id
    WHERE tweat_id = 77

res <- dbGetQuery(con, "SELECT * FROM comments WHERE user_id > 4")
#--------OR----------:#

# Connect to the database
library(DBI)
con <- dbConnect(RMySQL::MySQL(),
                 dbname = "tweater",
                 host = "courses.csrrinzqubik.us-east-1.rds.amazonaws.com",
                 port = 3306,
                 user = "student",
                 password = "datacamp")
# Send query to the database
#dbReadTable(con, "comments")
res <- dbSendQuery(con, "SELECT * FROM comments WHERE user_id > 4")
dbFetch(res, "SELECT * FROM comments WHERE user_id > 4", n=2)
# Clear res
dbClearResult(res)
dbDisconnect(con)

#######   Send - Fetch - Clear   #######
res<- dbSendQuery(con, "SELECT * FROM products WHERE contract==1")
while(!dbHasCompleted(res()){
   chunk<- dbFetch(res, n=1)
   print(chunk)
}
dbClearResult(res)
dbDisconnect(con)

Importing data from the WEB

# Load the readr package
library(readr)
# Import the csv file: pools
url_csv <- "http://s3.amazonaws.com/assets.datacamp.com/production/course_1478/datasets/swimming_pools.csv"
pools<-read_csv(url_csv)
# Import the txt file: potatoes
url_delim <- "http://s3.amazonaws.com/assets.datacamp.com/production/course_1478/datasets/potatoes.txt"
potatoes<-read_tsv(url_delim)
# Print pools and potatoes
pools
potatoes
# https URL to the swimming_pools csv file.
url_csv <- "https://s3.amazonaws.com/assets.datacamp.com/production/course_1478/datasets/swimming_pools.csv"
# Import the file using read.csv(): pools1
pools1<- read.csv(url_csv)
names(pools1)
# Load the readr package
library(readr)
# Import the file using read_csv(): pools2
pools2<- read_csv(url_csv)
# Print the structure of pools1 and pools2
str(pools1)
str(poosl2)
# Load the readxl and gdata package
library(readxl)
library(gdata)
# Specification of url: url_xls
url_xls <- "http://s3.amazonaws.com/assets.datacamp.com/production/course_1478/datasets/latitude.xls"
# Import the .xls file with gdata: excel_gdata
excel_gdata<- read.xls(url_xls) #gdata
# Download file behind URL, name it local_latitude.xls
download.file(url_xls, "local_latitude.xls")
# Import the local .xls file with readxl: excel_readxl
excel_readxl<- read_excel("local_latitude.xls")
# https URL to the wine RData file.
url_rdata <- "https://s3.amazonaws.com/assets.datacamp.com/production/course_1478/datasets/wine.RData"
# Download the wine file to your working directory
download.file(url_rdata, "wine_local.RData")
# Load the wine data into your workspace using load()
load("wine_local.RData")
# Print out the summary of the wine data
summary(wine)
#Another way to load remote RData file, 
#however this will not save the RData file to a local file
load(url("url_rdata"))
# Load the httr package
library(httr)

# Get the url, save response to resp
url <- "http://www.example.com/"
resp<-GET(url)
# Print resp
resp
# Get the raw content of resp: raw_content
raw_content<-content(resp, as="raw")
char_content<-content(resp, as="text")
install.packages("xml2")
library(xml2)
list_content<-content(resp, as="parsed")
# Print the head of raw_content
head(char_content)
head(list_content)
head(raw_content)
httr provides a convenient function, GET() to execute this GET request. The result is a response object, that provides easy access to the status code, content-type and, of course, the actual content. You can extract the content from the request using the content() function. At the time of writing, there are three ways to retrieve this content: as a raw object, as a character vector, or an R object, such as a list. If you don’t tell content()how to retrieve the content through the as argument, it’ll try its best to figure out which type is most appropriate based on the content-type.
# httr is already loaded
# Get the url
url <- "http://www.omdbapi.com/?apikey=ff21610b&t=Annie+Hall&y=&plot=short&r=json"
resp<-GET(url)
# Print resp
resp
# Print content of resp as text
content(resp, as="text") #get text content of resp
# Print content of resp
#R figures out automatically that you're dealing with a JSON,
#and converts the JSON to a named R list.
content(resp)
httr converts JSON response body automatically to an R list 

JSON well-structured file format - for commnication w. WEB APIs

API – Application Programming Interface – set of routines for building software. WEB API – interface to add or get data from server – HTTP verbs: GET and others.
JSON is built on two structures: objects and arrays. JSON object – collection of {name:value, ..}.
fromJSON() {jsonlite} converts JSON data char strings into structured R list
library(jsonlite)
# create a JSON
wheat_json <- '{"name":"Chateau Migraine", "year":1997, "proteins":12.4, "color":"red", "awarded":false}'
# Convert wheat_json into a list wheat
wheat<-fromJSON(wheat_json)
Quandl API, where you can fetch all sorts of financial and economical data
quandl_url <- "https://www.quandl.com/api/v3/datasets/WIKI/FB/data.json?auth_token=i83asDsiWUUyfoypkgMz"
# Import Quandl data: quandl_data
quandl_data<- fromJSON(quandl_url)
OMDB API where you can fetch all data about movey
url_sw4 <- "http://www.omdbapi.com/?apikey=ff21610b&i=tt0076759&r=json"
sw4 <- fromJSON(url_sw4)
# Print out the Title element of the list
sw4$Title
load(jsonlite)
json1 <- '[1, 2, 3, 4, 5, 6]'
json2 <- '{"a": [1, 2, 3],"b":[4,5,6]}'
json3 <- '[[1, 2], [3, 4]]'
json4 <- '[{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]'
fromJSON(json4)
url_csv <- "http://s3.amazonaws.com/assets.datacamp.com/production/course_1478/datasets/water.csv"
# Import the .csv file located at url_csv
water<- read.csv(url_csv, stringsAsFactors=FALSE)
# Convert the data file 
water_json<-toJSON(water)
pretty_json<- toJSON(mtcars,pretty=TRUE)
#pretty_json<- prettify(toJSON(mtcars))

# Minify pretty_json: mini_json
mini_json<- minify(pretty_json)

# Mini
{"a":1,"b":2,"c":{"x":5,"y":6}}

# Pretty
{
  "a": 1,
  "b": 2,
  "c": {
    "x": 5,
    "y": 6
  }
}