This morning I finished a refactoring that I'd started on Friday when I realized that the UX of the web pages I was making (to be thrown away) were really all wrong. I was doing the more direct scheme - loading a lot of data into memory and then manipulating it quickly to render different graphs. The problem with this is that the data sets are really very large, and the load times for them are longer than you'd want to sit for.
Once of the guys on the team was saying "Break it up into a bunch of small requests." And while I could see that approach, I thought that the overhead of all the calls was going to really kill performance. I never even really considered it.
But about 4:00 pm on Friday, when I was really very frustrated with the code I was working on for that page, I decided to give it a go. What made most sense to me was to break the requests into a few stages:
- Get the list of all runs - based on the selected database, get the division/timestamp pairs for all runs in that database. We'll be able to parse them next, but this is a nice, short little method.
- Parse the executions for divisions and timestamps - take the division/timestamp data for all runs in a database and create a data structure where the list of timestamps will be stored in a descending order for a given division.
- Set HTML run options for a given division - when the user selects a division, take the parsed data and create the drop-down for the runtimes for that guy.
- Query for the specific data - taking the data from the options - the database, the division, the timestamp, hit CouchDB for the exact data we need to visualize. In many cases this is less than 100 documents.
- Parse the documents - once we have the targeted data from CouchDB, parse it into Google DataTable or ZingChart series.
- Render the data - last step.
I was surprised to see that the resulting code was smaller than I'd had. The parsing of the data structures was really a lot more than I thought. Starting at the top of the list, the code to get the list of all runs is really simply:
function reload_executions() {
// hit CouchDB for the view of all executions it knowns about
var svr_opt = document.getElementById('server_opt');
var url = svr_opt.value + '/_design/general/_view/executions?' +
opts + '&callback=?';
$.getJSON(url, function(data) {
parse_execution_tags(data);
});
}
Once again, jQuery really helps me out. Next, I need to parse this data into a structure of all the runs by division:
function parse_execution_tags(data) {
divisions = new Array();
runtimes = new Object();
for(var i in data.rows) {
// get the execution_tag and exclude the very early ones
var exec_tag = data.rows[i].key;
if (!/-\D+$/i.test(exec_tag) || (exec_tag.substring(0,10) < weekAgo)) {
continue;
}
// now get the timestamp and division from the execution_tag
var runtime = exec_tag.replace(/-\D+/g, '');
var division = exec_tag.replace(/^.*\.\d\d\d-/g, '');
if (typeof(runtimes[division]) == 'undefined') {
runtimes[division] = new Array();
divisions.push(division);
}
runtimes[division].push(runtime);
}
// sort the divisions and create the contents of the drop-down
if (divisions.length > 0) {
divisions.sort();
var div_opt = document.getElementById('division_opt');
div_opt.options.length = 0;
for (var d in divisions) {
div_opt.options[div_opt.options.length] =
new Option(divisions[d], divisions[d]);
}
}
// given the default division, load up the run times we just parsed
set_runs_for_division(divisions[0]);
}
where I'd created the variable weekAgo to be able to let me know what the "recent" data was:
// get the date a week ago formatted as YYYY-MM-DD
var when = new Date();
when.setDate(when.getDate() - 7);
var weekAgo = when.getFullYear()+'-'
+('0'+(when.getMonth()+1)).substr(-2,2)+'-'
+('0'+when.getDate()).substr(-2,2);
Once the data is all parsed into the structures we can then build up the drop down for the runs for a selected division with the function:
function set_runs_for_division(division) {
division = (typeof(division) !== 'undefined' ? division :
document.getElementById('division_opt').value);
runtimes[division].sort();
runtimes[division].reverse();
var run_opt = document.getElementById('run_opt');
run_opt.options.length = 0;
for (var i in runtimes[division]) {
var tag = runtimes[division][i];
run_opt.options[run_opt.options.length] = new Option(tag, tag);
}
// at this point, call back to the the data we need, and then render it
reload();
}
Calling to get the actual data is pretty simple:
function reload() {
// hit CouchDB for the view we need to process
var svr_opt = document.getElementById('server_opt');
var view_opt = document.getElementById('view_opt');
var run_opt = document.getElementById('run_opt');
var div_opt = document.getElementById('division_opt');
var et = run_opt.value + '-' + div_opt.value;
var url = svr_opt.value + '/' + view_loc + view_opt.value + '?' +
'startkey=' + JSON.stringify([et,{}]) +
'&endkey=' + JSON.stringify([et]) + opts + '&callback=?';
$.getJSON(url, function(data) {
var tbl = parse_series(data);
render(tbl);
});
}
but parsing it into a Google DataTable is not nearly as simple. The code is complicated by the different requests we need to properly create:
function parse_series(data) {
// now put all the data into an object keyed by the execution_tag
var view_opt = document.getElementById('view_opt');
var table = new google.visualization.DataTable();
table.addColumn('string', 'Division');
switch (view_opt.value) {
case 'merchants_by_existing_merchant':
table.addColumn('number', 'Deals');
break;
case 'merchants_by_research_ranking':
table.addColumn('number', 'Rank');
break;
case 'merchants_by_status':
table.addColumn('string', 'Status');
break;
case 'merchants_by_rep':
table.addColumn('string', 'Rep SF ID');
break;
}
table.addColumn('string', 'Merchant');
table.addColumn('number', 'Sales Value');
for(var i in data.rows) {
var row = data.rows[i];
var name = (row.value.name.length > 60 ?
row.value.name.substring(0,60)+'...' : row.value.name);
var table_row = new Array();
table_row.push(row.value.division);
switch (view_opt.value) {
case 'merchants_by_existing_merchant':
case 'merchants_by_research_ranking':
case 'merchants_by_status':
case 'merchants_by_rep':
table_row.push(row.key[1]);
break;
}
table_row.push(name);
table_row.push(row.value.sales_value);
table.addRow(table_row);
}
// now let's apply the formatter to the sales value column
var fmt = new google.visualization.NumberFormat(sv_format);
switch (view_opt.value) {
case 'merchants_by_existing_merchant':
case 'merchants_by_research_ranking':
case 'merchants_by_status':
case 'merchants_by_rep':
fmt.format(table, 3);
break;
default:
fmt.format(table, 2);
break;
}
return table;
}
but the rendering is very simple:
function render(tbl) {
var dest = document.getElementById('table_div');
var table = new google.visualization.Table(dest);
table.draw(tbl, table_config);
}
When I put it all together I was amazed to learn that the hits were exceptionally fast. The page is far more responsive, and in short - I could not possibly have been more wrong. The human lag is sufficient to make the calls invisible, and the sluggishness of the memory load on the old version was horrible. This is a far better solution.
I'm going to remember this for the future.