Plotting Exploratory Data Analysis and Visualization Class Skills in RStudio
Creating a Branch, and Generating Graphs with RStudio (ggplot)
For more on the class check out this earlier blog post Exploratory Data Analysis and Visualization Course.
Some stuff to get started:
List of steps:
- fork
- clone
- checkout -b
- mkdir
- -> create field
- add
- commit
- push
- read.csv()
- merge upstream/gh-pages
The Plot
The Code
library(RCurl)
library(ggplot2)
con <- getURL("https://docs.google.com/spreadsheets/d/19a0O6C14zButypjcnWictvKWeyPjPjQdrps-UXzPDf8/export?format=csv", ssl.verifypeer = FALSE)
df <- read.csv(textConnection(con), stringsAsFactors=FALSE, check.names=FALSE)
View(df)
str(df)
names(df)
#df <- gsub( "OpenRefine" , "Openrefine" , df)
removeParentheses <- function(x){
gsub("\\(.*\\)$/", "", x)
}
mrOptions <- strsplit("Excel, R, Stata, D3, Gephi, ggplot2, lattice, SQL, git / Github, SPSS, shell (terminal / command line), regular expressions (grep), Rstudio, JSON, Python, Sweave/knitr, Processing (language), C/C++, Leaflet, CartoDB, GeoJSON, node/npm, go language, ruby, LaTeX, Heroku, Make, Pandas, Julia, non-git version control, XML, Web: html css js, vagrant/virtualbox, amazon web services, dropbox, google drive (formerly docs), OpenRefine (formerly Google refine), Pair programming", ", ")[[1]]
mrOptions <- removeParentheses(mrOptions)
mrOptions <- gsub( "OpenRefine" , "Openrefine" , mrOptions)
mrOptions
expandSelections <- function(selected, options){
selected <- removeParentheses(selected)
sapply(options, grepl, x=selected, fixed=TRUE)
}
# gridLevels <- c("None", "A little", "Confident", "Expert")
# makeOrderedFactor <- function(col, levels){
# return (factor(col, levels=levels, labels=levels, ordered=TRUE))
# }
mrLogical <- t(sapply(gsub( "OpenRefine" , "Openrefine" ,df[['Baseline experience']]), expandSelections,
options=mrOptions))
View(mrLogical)
sum(mrLogical)
colSums(mrLogical)
barplot(colSums(mrLogical))
qplot(colSums(mrLogical))
plotdf <- data.frame(count=colSums(mrLogical), item=factor(colnames(mrLogical)))
plotdf$item <- with(plotdf, reorder(item,count))
ggplot(plotdf, aes(y=count, x=item)) + geom_bar(stat="identity") + coord_flip()