Commit 5697872f authored by Kautsar, Satria's avatar Kautsar, Satria
Browse files

Update heatmap visualization (todo: filter heatmap rows and columns to the top...

Update heatmap visualization (todo: filter heatmap rows and columns to the top x most significant members)
parent d924e594
......@@ -2807,7 +2807,7 @@ if __name__=="__main__":
else:
clusterNamesToGenomes[bgc] = genomes.index(identifier)
run_data["input"]["accession"] = [{ "id": "genome_{}".format(i), "label": acc } for i, acc in enumerate(genomes)]
run_data["input"]["accession_newick"] = "()" # todo ...
run_data["input"]["accession_newick"] = [] # todo ...
run_data["input"]["classes"] = [{ "label": cl } for cl in classes ] # todo : colors
run_data["input"]["bgc"] = [{ "id": bgc, "acc": clusterNamesToGenomes[bgc], "class": clusterNamesToClasses[bgc] } for bgc in clusterNames]
......
var clusterfck = (function() {
var module = { exports: {}};
var exports = module.exports;
module.exports = (function() {
var module = { exports: {}};
var exports = module.exports;
var HierarchicalClustering = function(distance, merge, threshold) {
this.distance = distance || clusterfck.EUCLIDEAN_DISTANCE;
this.merge = merge || clusterfck.AVERAGE_LINKAGE;
this.threshold = threshold == undefined ? Infinity : threshold;
}
HierarchicalClustering.prototype = {
cluster : function(items, snapshot, snapshotCallback) {
var clusters = [];
var dists = []; // distances between each pair of clusters
var mins = []; // closest cluster for each cluster
var index = []; // keep a hash of all clusters by key
for(var i = 0; i < items.length; i++) {
var cluster = { canonical: items[i], key: i, index: i, size: 1};
clusters[i] = cluster;
index[i] = cluster;
dists[i] = [];
mins[i] = 0;
}
for(var i = 0; i < clusters.length; i++) {
for(var j = 0; j <= i; j++) {
var dist = (i == j) ? Infinity :
this.distance(clusters[i].canonical, clusters[j].canonical);
dists[i][j] = dist;
dists[j][i] = dist;
if(dist < dists[i][mins[i]])
mins[i] = j;
}
}
var merged = this.mergeClosest(clusters, dists, mins, index);
var i = 0;
while(merged) {
if(snapshotCallback && (i % snapshot) == 0)
snapshotCallback(clusters);
merged = this.mergeClosest(clusters, dists, mins, index);
i++;
}
clusters.forEach(function(cluster) {
// clean up metadata used for clustering
delete cluster.key;
delete cluster.index;
});
return clusters;
},
mergeClosest: function(clusters, dists, mins, index) {
// find two closest clusters from cached mins
var minKey = 0, min = Infinity;
for(var i = 0; i < clusters.length; i++) {
var key = clusters[i].key,
dist = dists[key][mins[key]];
if(dist < min) {
minKey = key;
min = dist;
}
}
if(min >= this.threshold)
return false;
var c1 = index[minKey],
c2 = index[mins[minKey]];
// merge two closest clusters
var merged = { canonical: this.merge(c1.canonical, c2.canonical),
left: c1,
right: c2,
key: c1.key,
size: c1.size + c2.size };
merged.left.dist = 0;
merged.right.dist = dists[key][mins[key]];
clusters[c1.index] = merged;
clusters.splice(c2.index, 1);
index[c1.key] = merged;
// update distances with new merged cluster
for(var i = 0; i < clusters.length; i++) {
var ci = clusters[i];
var dist;
if(c1.key == ci.key)
dist = Infinity;
else if(this.merge == clusterfck.SINGLE_LINKAGE) {
dist = dists[c1.key][ci.key];
if(dists[c1.key][ci.key] > dists[c2.key][ci.key])
dist = dists[c2.key][ci.key];
}
else if(this.merge == clusterfck.COMPLETE_LINKAGE) {
dist = dists[c1.key][ci.key];
if(dists[c1.key][ci.key] < dists[c2.key][ci.key])
dist = dists[c2.key][ci.key];
}
else if(this.merge == clusterfck.AVERAGE_LINKAGE) {
dist = (dists[c1.key][ci.key] * c1.size
+ dists[c2.key][ci.key] * c2.size) / (c1.size + c2.size);
}
else
dist = this.distance(ci.canonical, c1.canonical);
dists[c1.key][ci.key] = dists[ci.key][c1.key] = dist;
}
// update cached mins
for(var i = 0; i < clusters.length; i++) {
var key1 = clusters[i].key;
if(mins[key1] == c1.key || mins[key1] == c2.key) {
var min = key1;
for(var j = 0; j < clusters.length; j++) {
var key2 = clusters[j].key;
if(dists[key1][key2] < dists[key1][min])
min = key2;
}
mins[key1] = min;
}
clusters[i].index = i;
}
// clean up metadata used for clustering
delete c1.key; delete c2.key;
delete c1.index; delete c2.index;
return true;
}
}
var SINGLE_LINKAGE = function(c1, c2) { return c1; };
var COMPLETE_LINKAGE = function(c1, c2) { return c1; };
var AVERAGE_LINKAGE = function(c1, c2) { return c1; };
var EUCLIDEAN_DISTANCE = function(v1, v2) {
var total = 0;
for(var i = 0; i < v1.length; i++)
total += Math.pow(v2[i] - v1[i], 2)
return Math.sqrt(total);
}
var MANHATTAN_DISTANCE = function(v1, v2) {
var total = 0;
for(var i = 0; i < v1.length ; i++)
total += Math.abs(v2[i] - v1[i])
return total;
}
var MAX_DISTANCE = function(v1, v2) {
var max = 0;
for(var i = 0; i < v1.length; i++)
max = Math.max(max , Math.abs(v2[i] - v1[i]));
return max;
}
var hcluster = function(items, distance, merge, threshold, snapshot, snapshotCallback) {
return (new HierarchicalClustering(distance, merge, threshold))
.cluster(items, snapshot, snapshotCallback);
}
clusterfck = {
hcluster: hcluster,
SINGLE_LINKAGE: SINGLE_LINKAGE,
COMPLETE_LINKAGE: COMPLETE_LINKAGE,
AVERAGE_LINKAGE: AVERAGE_LINKAGE,
EUCLIDEAN_DISTANCE: EUCLIDEAN_DISTANCE,
MANHATTAN_DISTANCE: MANHATTAN_DISTANCE,
MAX_DISTANCE: MAX_DISTANCE
};
module.exports = clusterfck;
return module.exports; })();
return module.exports; })()
\ No newline at end of file
......@@ -14,6 +14,7 @@
<script src="../../js/kinetic-v5.1.0.min.js"></script>
<script src="../../js/Chart.bundle.js"></script>
<script src="../../js/biojs-io-newick.min.js"></script>
<script src="../../js/clusterfck-0.1-satria.js"></script>
<script src="../../js/inchlib-1.2.0.1-satria.js"></script>
<script src="./run_data.js"></script>
<div class="overviewpage">
......@@ -72,10 +73,41 @@
<span id="network_max_bgc_in_family" class="inforight"></span>
</div>
<div class="infobar">
<span class="infoleft">BGC Family x Genome Accession absence/presence:</span>
</div>
<div class="infobar">
<div id="abpres_heatmap"></div>
<span style="font-weight: bold;">GCF absence/presence heatmap</span>
</div>
<div class="infobar">
<span class="infoleft" style="font-size: 12px;">
Cluster GCF based on: <br />
<select id="abpres_heatmap-col_clustering">
</select>
</span>
<span class="inforight" style="font-size: 12px;">
Show: <br />
<select id="abpres_heatmap-col_showtop">
<option value="all">All</option>
</select>
most prominent GCFs
</span>
</div>
<div class="infobar">
<span class="infoleft" style="font-size: 12px;">
Cluster Genomes based on: <br />
<select id="abpres_heatmap-row_clustering">
</select>
</span>
<span class="inforight" style="font-size: 12px;">
Show: <br />
<select id="abpres_heatmap-row_showtop">
<option value="all">All</option>
</select>
most prominent Accessions
</span>
</div>
<div class="infobar">
<div id="abpres_heatmap">
</div>
</div>
</div>
</div>
......@@ -174,6 +206,7 @@
}
viewNetwork(0);
});
function viewNetwork(idx) {
$("#network_selection .tablink").removeClass("w3-border-red");
$("#network_selection .tablink").eq(idx).addClass("w3-border-red");
......@@ -190,15 +223,29 @@
$("#network_total_families").html(totalFamily);
$("#network_avg_bgc_in_family").html((sumBGC / totalFamily).toFixed(0));
$("#network_max_bgc_in_family").html(maxBGC);
// build inchlib dataset
var inchlib_data = generateRealHeatmapData(run_data["input"], run_data["networks"][idx]);//generateHeatmapData(100);
// absence/presence heatmap
$("#abpres_heatmap-row_clustering").change(function(handler){ renderHeatmap(idx); });
$("#abpres_heatmap-row_clustering").html("")
.append("<option value='abpres' selected>Family Absence/Presence</option>")
.append("<option value='name'>Accession Label</option>");
for (var i in run_data["input"]["accession_newick"]) {
$("#abpres_heatmap-row_clustering").append("<option value='newick_" + i + "'>" + run_data["input"]["accession_newick"][i]["label"] + "</option>");
}
$("#abpres_heatmap-col_clustering").change(function(handler){ renderHeatmap(idx); });
$("#abpres_heatmap-col_clustering").html("")
.append("<option value='gcf' selected>BGC Similarity Matrix</option>")
.append("<option value='abpres'>Genomes Absence/Presence</option>");
renderHeatmap(idx);
}
function renderHeatmap(idx) {
var inchlib_data = generateRealHeatmapData(run_data["input"], run_data["networks"][idx]);
var inchlib = new InCHlib({
target: "abpres_heatmap",
metadata: false,
heatmap_colors: "OrRd",
draw_row_ids: true,
dendrogram: false,
dendrogram: true,
column_dendrogram: false,
heatmap_part_width: 0.8,
max_column_width: 17,
......@@ -214,7 +261,6 @@
for(i = 0; i < inchlib_data.data.feature_names.length; i++) {
inchlib.settings.ranges.push([0,1,1]);
}
console.log(inchlib_data);
inchlib.read_data(inchlib_data);
inchlib.draw();
}
......@@ -235,9 +281,96 @@
"nodes": {}
}
}
// build row (accession) dendrogram
var acc_tree = parser.parse_newick(input_data["accession_newick"]);
// apply filtering
var included_accessions = [];
var included_gcfs = [];
for (var i = 0; i < run_data["input"]["accession"].length; i++) {
included_accessions.push(i);
}
for (var i = 0; i < family_data.length; i++) {
included_gcfs.push(i);
}
// calculate absence/presence
var acc_features = {};
var acc_labels = {};
var feature_names = [];
for (var i in run_data["input"]["accession"]) {
var accId = run_data["input"]["accession"][i]["id"];
acc_features[accId] = [];
for (var j = 0; j < included_gcfs.length; j++) {
acc_features[accId].push(0);
}
}
for (var i in included_gcfs) {
var fam_data = family_data[included_gcfs[i]];
feature_names.push(fam_data["label"]);
for (var j in fam_data["members"]) {
var bgcId = fam_data["members"][j];
var accId = run_data["input"]["accession"][run_data["input"]["bgc"][bgcId]["acc"]]["id"];
acc_features[accId][i] += 1;
}
}
// update feature array
result["data"]["feature_names"] = feature_names;
// build row (accession) dendrogram
switch ($("#abpres_heatmap-row_clustering").val()) {
case "abpres":
$("#abpres_heatmap-row_showtop").val("all");
$("#abpres_heatmap-row_showtop").attr("disabled", false);
var cluster_items = [];
for (var i in run_data["input"]["accession"]) {
if (included_accessions.indexOf(parseInt(i)) > -1) {
var accId = run_data["input"]["accession"][i]["id"];
cluster_items.push({
"id": accId,
"features": acc_features[accId]
});
acc_labels[accId] = input_data["accession"][i]["label"];
}
}
var cluster_fck = clusterfck.hcluster(cluster_items, function(f1, f2){
var sumpower = 0.00;
for (var i in f1["features"]) {
sumpower += Math.pow(f1["features"][i] - f2["features"][i], 2);
}
return Math.sqrt(sumpower);
}, clusterfck.COMPLETE_LINKAGE);
result["data"]["nodes"] = clusterfck_tree_to_inchlib_nodes(cluster_fck[0]);
break;
case "name":
$("#abpres_heatmap-row_showtop").val("all");
$("#abpres_heatmap-row_showtop").attr("disabled", false);
var cluster_items = [];
for (var i in run_data["input"]["accession"]) {
if (included_accessions.indexOf(parseInt(i)) > -1) {
var accId = run_data["input"]["accession"][i]["id"];
cluster_items.push({
"id": accId,
"features": input_data["accession"][i]["label"]
});
acc_labels[accId] = input_data["accession"][i]["label"];
}
}
var cluster_fck = clusterfck.hcluster(cluster_items, function(f1, f2){
var label1 = f1["features"].toLowerCase();
var label2 = f2["features"].toLowerCase();
var word_length = Math.max(label1.length, label2.length);
var matches = 0;
for (var i=0; i < word_length; i++) {
if (label1[i] !== label2[i]) {
break;
}
matches++;
}
return (word_length - matches) / word_length;
}, clusterfck.COMPLETE_LINKAGE);
result["data"]["nodes"] = clusterfck_tree_to_inchlib_nodes(cluster_fck[0]);
break;
default:
$("#abpres_heatmap-row_showtop").val("all");
$("#abpres_heatmap-row_showtop").attr("disabled", true);
var newick_idx = parseInt($("#abpres_heatmap-row_clustering").val().split("newick_").pop());
var acc_tree = parser.parse_newick(input_data["accession_newick"][newick_idx]["newick"]);
for (var i in run_data["input"]["accession"]) {
acc_labels[run_data["input"]["accession"][i]["id"]] = input_data["accession"][i]["label"];
}
......@@ -245,49 +378,23 @@
if ((acc_tree["children"][0]["name"] === undefined) || (acc_tree["children"][0]["name"].length < 1)) { // hack-ish catch
acc_tree["children"].shift();
}
for (var i in run_data["input"]["accession"]) {
var accId = run_data["input"]["accession"][i]["id"];
if (incorporated_accession.indexOf(accId) < 0) {
acc_tree["children"].push({ name: accId, branch_length: 0 });
}
result["data"]["nodes"] = biojs_newick_tree_to_inchlib_nodes(acc_tree);
break;
}
result["data"]["nodes"] = tree_to_nodes(acc_tree);
// build column (family) dendrogram
var fam_tree = parser.parse_newick(network_data["families_newick"]);
var incorporated_family = get_all_labels_in_tree(fam_tree);
if ((fam_tree["children"][0]["name"] === undefined) || (fam_tree["children"][0]["name"].length < 1)) { // hack-ish catch
fam_tree["children"].shift();
}
for (var i in family_data) {
var famId = family_data[i]["label"];
if (incorporated_family.indexOf(famId) < 0) {
fam_tree["children"].push({ name: famId, branch_length: 0 });
}
}
console.log(fam_tree);
result["column_dendrogram"]["nodes"] = tree_to_nodes(fam_tree);
// fill features & update label
var acc_features = {};
for (var i in run_data["input"]["accession"]) {
var accId = run_data["input"]["accession"][i]["id"];
acc_features[accId] = [];
for (var j = 0; j < family_data.length; j++) {
acc_features[accId].push(0);
}
}
for (var i in family_data) {
result["data"]["feature_names"].push(family_data[i]["label"]);
for (var j in family_data[i]["members"]) {
var bgcId = family_data[i]["members"][j];
var accId = run_data["input"]["accession"][run_data["input"]["bgc"][bgcId]["acc"]]["id"];
acc_features[accId][i] += 1;
}
}
result["column_dendrogram"]["nodes"] = biojs_newick_tree_to_inchlib_nodes(fam_tree);
// update row labels
for (var i in result["data"]["nodes"]) {
var node = result["data"]["nodes"][i];
if (node.hasOwnProperty("objects")) {
var accId = node["objects"][0];
node["objects"][0] = acc_labels[accId];
node["objects"][0] = acc_labels[accId].substring(0, 30);
node["features"] = acc_features[accId];
}
}
......@@ -309,7 +416,88 @@
return accession_list;
}
function tree_to_nodes(node, parentNode = -1, nodes = {}, leaf_count = 0, node_count = 0, branch_parent_length = 0, branch_total_length = 0) {
// convert clusterfck tree to inchlib nodes
function clusterfck_tree_to_inchlib_nodes(node, parentNode = -1, nodes = {}, leaf_count = 0, node_count = 0, branch_parent_length = 0, branch_total_length = 0, isLeftChild = true) {
if (node["size"] > 1) { // is a node
node_count++;
var nId = "node_" + node_count;
var branchLength = branch_parent_length;
if (node.hasOwnProperty("dist")) {
branchLength += node["dist"];
}
if (branchLength > branch_total_length) {
branch_total_length = branchLength;
}
nodes[nId] = {
"count": node["size"],
"distance": branchLength,
}
if (parentNode !== -1) {
nodes[nId]["parent"] = parentNode;
}
if (nodes.hasOwnProperty(parentNode)) {
if (isLeftChild) {
nodes[parentNode]["left_child"] = nId;
} else {
nodes[parentNode]["right_child"] = nId;
}
}
var recresult = clusterfck_tree_to_inchlib_nodes(node["left"], nId, nodes, leaf_count, node_count, branchLength, branch_total_length, true);
nodes = recresult[0];
leaf_count = recresult[1];
node_count = recresult[2];
if (recresult[3] > branch_total_length) {
branch_total_length = recresult[3];
}
recresult = clusterfck_tree_to_inchlib_nodes(node["right"], nId, nodes, leaf_count, node_count, branchLength, branch_total_length, false);
nodes = recresult[0];
leaf_count = recresult[1];
node_count = recresult[2];
if (recresult[3] > branch_total_length) {
branch_total_length = recresult[3];
}
} else { // is a leaf
leaf_count++;
var lId = "leaf_" + leaf_count;
var branchLength = branch_parent_length;
if (node.hasOwnProperty("dist")) {
branchLength += node["dist"];
}
if (branchLength > branch_total_length) {
branch_total_length = branchLength;
}
nodes[lId] = {
"count": 1,
"distance": branchLength,
"features": [],
"objects": [node["canonical"]["id"]],
};
if (nodes[lId]["parent"] !== -1) {
nodes[lId]["parent"] = parentNode;
}
if (nodes.hasOwnProperty(parentNode)) {
if (isLeftChild) {
nodes[parentNode]["left_child"] = lId;
} else {
nodes[parentNode]["right_child"] = lId;
}
}
}
if (parentNode == -1) {
// the root tree, fill left_child & right_child and churn results
for (var nodeId in nodes) {
nodes[nodeId]["distance"] = branch_total_length - nodes[nodeId]["distance"];
}
return nodes;
} else {
// recursive return
return [nodes, leaf_count, node_count, branch_total_length];
}
}
// convert biojs-io-newick tree to inchlib nodes
function biojs_newick_tree_to_inchlib_nodes(node, parentNode = -1, nodes = {}, leaf_count = 0, node_count = 0, branch_parent_length = 0, branch_total_length = 0) {
if (node.hasOwnProperty("children")) { // is a node
node_count++;
var node_count_pre = node_count;
......@@ -336,7 +524,7 @@
var pId = (i < node["children"].length - 2?(node_count_pre + i):(node_count_pre + node["children"].length - 2));
var parentChild = "node_" + pId;
var branchLength = branch_parent_length + (node.hasOwnProperty("branch_length")?node["branch_length"]:0.00);
var recresult = tree_to_nodes(node["children"][i], parentChild, nodes, leaf_count, node_count, branchLength, 0);
var recresult = biojs_newick_tree_to_inchlib_nodes(node["children"][i], parentChild, nodes, leaf_count, node_count, branchLength, 0);
nodes = recresult[0];
if (nodes[parentChild].hasOwnProperty("parent")) {
nodes[nodes[parentChild]["parent"]]["count"] += nodes[parentChild]["count"];
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment