%PDF- <> %âãÏÓ endobj 2 0 obj <> endobj 3 0 obj <>/ExtGState<>/ProcSet[/PDF/Text/ImageB/ImageC/ImageI] >>/Annots[ 28 0 R 29 0 R] /MediaBox[ 0 0 595.5 842.25] /Contents 4 0 R/Group<>/Tabs/S>> endobj ºaâÚÎΞ-ÌE1ÍØÄ÷{òò2ÿ ÛÖ^ÔÀá TÎ{¦?§®¥kuµùÕ5sLOšuY>endobj 2 0 obj<>endobj 2 0 obj<>endobj 2 0 obj<>endobj 2 0 obj<> endobj 2 0 obj<>endobj 2 0 obj<>es 3 0 R>> endobj 2 0 obj<> ox[ 0.000000 0.000000 609.600000 935.600000]/Fi endobj 3 0 obj<> endobj 7 1 obj<>/ProcSet[/PDF/Text/ImageB/ImageC/ImageI]>>/Subtype/Form>> stream
#!/usr/bin/env Rscript library(ggplot2); library(plyr); # get __dirname and load ./_cli.R args = commandArgs(trailingOnly = F); dirname = dirname(sub("--file=", "", args[grep("--file", args)])); source(paste0(dirname, '/_cli.R'), chdir=T); if (is.null(args.options$xaxis) || is.null(args.options$category) || (!is.null(args.options$plot) && args.options$plot == TRUE)) { stop("usage: cat file.csv | Rscript scatter.R [variable=value ...] --xaxis variable variable name to use as xaxis (required) --category variable variable name to use as colored category (required) --plot filename save plot to filename --log use a log-2 scale for xaxis in the plot"); } plot.filename = args.options$plot; # parse options x.axis.name = args.options$xaxis; category.name = args.options$category; use.log2 = !is.null(args.options$log); # parse data dat = read.csv(file('stdin'), strip.white=TRUE); dat = data.frame(dat); # List of aggregated variables aggregate = names(dat); aggregate = aggregate[ ! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name) ]; # Variables that don't change aren't aggregated for (aggregate.key in aggregate) { if (length(unique(dat[[aggregate.key]])) == 1) { aggregate = aggregate[aggregate != aggregate.key]; } } # Print out aggregated variables for (aggregate.variable in aggregate) { cat(sprintf('aggregating variable: %s\n', aggregate.variable)); } if (length(aggregate) > 0) { cat('\n'); } # Calculate statistics stats = ddply(dat, c(x.axis.name, category.name), function(subdat) { rate = subdat$rate; # calculate confidence interval of the mean ci = NA; if (length(rate) > 1) { se = sqrt(var(rate)/length(rate)); ci = se * qt(0.975, length(rate) - 1) } # calculate mean and 95 % confidence interval r = list( rate = mean(rate), confidence.interval = ci ); return(data.frame(r)); }); print(stats, row.names=F); if (!is.null(plot.filename)) { p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name)); if (use.log2) { p = p + scale_x_continuous(trans='log2'); } p = p + geom_errorbar( aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval), width=.1, na.rm=TRUE ); p = p + geom_point(); p = p + geom_line(); p = p + ylab("rate of operations (higher is better)"); p = p + ggtitle(dat[1, 1]); ggsave(plot.filename, p); }