Note_Tech

All technological notes.


Project maintained by simonangel-fong Hosted on GitHub Pages — Theme by mattgraham

R - Data Mining

Back


Data Mining


Data mining tasks


Example:

library(jsonlite)
library(curl)
# url
url <- 'https://raw.githubusercontent.com/ansymo/msr2013-bug_dataset/master/data/v02/eclipse/bug_status.json'
# load data from url
ds <- fromJSON(txt=url)

# Extract bug_status
bug_list <- ds$bug_status

# How many tickets are there in the bug_list?
ticket_num <- length(bug_list)
ticket_num

# - What is total number of operations carried out within the bug_list? (Hint: tabulate the count by using table())
num_op_tb <- table(unlist(sapply(bug_list, function(x) x$what)))
num_op_tb

# - What is the total number of operations performed across all tickets in the bug_list?
total_num_op <- length(unlist(sapply(bug_list, function(x) x$what)))
total_num_op

# Find the number of unique users performing operations in the bug_list?
unique_num_user <- length(unique(unlist(sapply(bug_list, function(x) x$what))))
unique_num_use
library(ggplot2)
year_new <- unlist(sapply (bug_list, function (x) x$when[x$what == "NEW"]))
hist(as.POSIXct(year_new, origin = "1970-01-01"), breaks = "years")

data_mining_lab01.png


TOP