Creating a Branch, and Generating Graphs with RStudio (ggplot)

For more on the class check out this earlier blog post Exploratory Data Analysis and Visualization Course.

Some stuff to get started:

List of steps:

  1. fork
  2. clone
  3. checkout -b
  4. mkdir
  5. -> create field
  6. add
  7. commit
  8. push
  9. read.csv()
  10. merge upstream/gh-pages

The Plot

questionnare_plot

The Code

library(RCurl)
library(ggplot2)
con <- getURL("https://docs.google.com/spreadsheets/d/19a0O6C14zButypjcnWictvKWeyPjPjQdrps-UXzPDf8/export?format=csv", ssl.verifypeer = FALSE)
df <- read.csv(textConnection(con), stringsAsFactors=FALSE, check.names=FALSE)

View(df)

str(df)


names(df)
#df <- gsub( "OpenRefine" , "Openrefine" , df)

removeParentheses <- function(x){
  gsub("\\(.*\\)$/", "", x)
}
mrOptions <- strsplit("Excel, R, Stata, D3, Gephi, ggplot2, lattice, SQL, git / Github, SPSS, shell (terminal / command line), regular expressions (grep), Rstudio, JSON, Python, Sweave/knitr, Processing (language), C/C++, Leaflet, CartoDB, GeoJSON, node/npm, go language, ruby, LaTeX, Heroku, Make, Pandas, Julia, non-git version control, XML, Web: html css js, vagrant/virtualbox, amazon web services, dropbox, google drive (formerly docs), OpenRefine (formerly Google refine), Pair programming", ", ")[[1]]

mrOptions <- removeParentheses(mrOptions)

mrOptions <- gsub( "OpenRefine" , "Openrefine" , mrOptions)



mrOptions

expandSelections <- function(selected, options){
  selected <- removeParentheses(selected)
  sapply(options, grepl, x=selected, fixed=TRUE)
}
# gridLevels <- c("None", "A little", "Confident", "Expert")
# makeOrderedFactor <- function(col, levels){
#   return (factor(col, levels=levels, labels=levels, ordered=TRUE))
# }
mrLogical <- t(sapply(gsub( "OpenRefine" , "Openrefine" ,df[['Baseline experience']]), expandSelections, 
                      options=mrOptions))



View(mrLogical)
sum(mrLogical)

colSums(mrLogical)

barplot(colSums(mrLogical))

qplot(colSums(mrLogical))

plotdf <- data.frame(count=colSums(mrLogical), item=factor(colnames(mrLogical)))

plotdf$item <- with(plotdf, reorder(item,count))

ggplot(plotdf, aes(y=count, x=item)) + geom_bar(stat="identity") + coord_flip()