Commit a8050f00 authored by Nijveen's avatar Nijveen
Browse files

changed correlation function to calculate it on the fly

parent 0b34ce14
import os
from os import path
from django.shortcuts import render, render_to_response, HttpResponse
from django.shortcuts import render_to_response, HttpResponse
from django.conf import settings
import suggestions
import main
from main.views import no_results
from main.models import Experiment, GeneInfo, Species
import numpy
import re
import json
import numpy as np
from scipy import stats
# Create your views here.
......@@ -54,7 +53,7 @@ def correlation(request):
if ret:
return ret
else:
return main.views.no_results(gene_id, "correlation")
return no_results(gene_id, "correlation")
geneInfo = geneInfoList[0]
......@@ -75,10 +74,7 @@ def correlation(request):
except ValueError:
return HttpResponse('<h1> invalid correlation threshold </h1>')
# lodscores = loadLodFile(exp_name)
# correlations = calculateCorrelation(gene_name.upper(),lodscores)
correlations = loadPearsonFile(queryGene["id"].upper(), exp_name)
genes = getCorrelatingGenes(correlations, corrthld)
genes = getCorrelatingGenes(queryGene["id"].upper(), exp_name, corrthld)
genelist = list()
for gene in genes:
......@@ -104,86 +100,40 @@ def correlation(request):
return render_to_response('correlation.html', {'experiments': experiments})
def getCorrelatingGenes(correlations, corrthld):
genes = dict()
for gene in correlations:
if abs(correlations[gene]) >= corrthld:
genes[gene] = correlations[gene]
return genes
def loadLodFile(experiment):
lod_path = Experiment.objects.get(experiment_name=experiment).lod_file
lod_file_path = os.path.join(settings.MEDIA_ROOT, lod_path)
lodscores = dict()
with open(lod_file_path) as lod_file:
header = lod_file.readline()
for line in lod_file.readlines():
fields = line.strip().split('\t')
geneID = fields[0].upper()
lodscores[fields[0]] = fields[1:]
return lodscores
def calculateCorrelation(gene, lodscores):
geneLodscores = [abs(float(x)) for x in lodscores[gene]]
correlations = dict()
for s in lodscores:
correlations[s] = numpy.corrcoef(geneLodscores, [abs(float(x)) for x in lodscores[s]])[0, 1]
return correlations
def loadPearsonFile(gene, experiment):
lod_path = Experiment.objects.get(experiment_name=experiment).lod_file
pearson_path = re.sub(r'/[^/]*$', "/pearson.csv", lod_path)
index_path = re.sub(r'/[^/]*$', "/pearson.json", lod_path)
pearson_file_path = os.path.join(settings.MEDIA_ROOT, pearson_path)
index_json_path = os.path.join(settings.MEDIA_ROOT, index_path)
correlations = dict()
geneIndex = None
with open(index_json_path) as index_file:
geneIndex = json.load(index_file)
if gene not in geneIndex:
return correlations
with open(pearson_file_path) as pearson_file:
header = pearson_file.readline().strip();
pearson_file.seek(geneIndex[gene], 0)
line = pearson_file.readline().strip();
genes = header.split(",")
fields = line.split(",")
for g in range(1, len(genes)):
try:
correlations[genes[g]] = float(fields[g])
except ValueError:
print fields[g], g, genes[g]
return correlations
def createPearsonFileIndex(experiment):
lod_path = Experiment.objects.get(experiment_name=experiment).lod_file
pearson_path = re.sub(r'/[^/]*$', "/pearson.csv", lod_path)
index_path = re.sub(r'/[^/]*$', "/pearson.json", lod_path)
pearson_file_path = os.path.join(settings.MEDIA_ROOT, pearson_path)
index_json_path = os.path.join(settings.MEDIA_ROOT, index_path)
geneIndex = dict()
with open(pearson_file_path) as pearson_file:
line = pearson_file.readline()
while line:
filePos = pearson_file.tell()
line = pearson_file.readline()
geneID = line.split(',', 1)[0]
if geneID:
geneIndex[geneID.upper()] = filePos
with open(index_json_path, 'w') as index_file:
json.dump(geneIndex, index_file)
def getCorrelatingGenes(gene, exp_name, corrthld):
lod_pickle_path = path.join(settings.MEDIA_ROOT, 'data/%s/%s' % (exp_name, "lod.npy"))
genes_pickle_path = path.join(settings.MEDIA_ROOT, 'data/%s/%s' % (exp_name, "genes.npy"))
if not path.exists(lod_pickle_path):
lod_file = Experiment.objects.get(experiment_name=exp_name).lod_file
lod_file = lod_file.split("/")[-1]
lod_file_path = path.join(settings.MEDIA_ROOT, 'data/%s/%s' % (exp_name, lod_file))
with open(lod_file_path) as lodfile:
header = lodfile.readline().split()
ncol = len(header)
g = []
l = []
nrow = 0
for line in lodfile:
fields = line.split()
g.append(fields[0].upper())
l.append(fields[1:])
nrow += 1
genes = np.asarray(g)
a = np.asarray(l)
data = a.reshape(nrow, ncol - 1).astype(np.float)
np.save(lod_pickle_path,data)
np.save(genes_pickle_path,genes)
else:
data = np.load(lod_pickle_path)
genes = np.load(genes_pickle_path)
ms = data.mean(axis=1)[(slice(None, None, None), None)]
datam = data - ms
datass = np.sqrt(stats.ss(datam, axis=1))
gene_index = genes.tolist().index(gene)
temp = np.dot(datam, datam[gene_index].T)
rs = temp / (datass * datass[gene_index])
return dict(zip(genes[abs(rs)>=corrthld],rs[abs(rs)>=corrthld]))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment