views.py 6.04 KB
Newer Older
Nijveen, Harm's avatar
Nijveen, Harm committed
1
2
import os

Nijveen, Harm's avatar
Nijveen, Harm committed
3
from django.shortcuts import render, render_to_response, HttpResponse
Nijveen, Harm's avatar
Nijveen, Harm committed
4
5
from django.conf import settings

6
from main.models import Experiment, GeneInfo, Species
Nijveen, Harm's avatar
Nijveen, Harm committed
7
8
9
10
11

import numpy
import re
import json

Nijveen, Harm's avatar
Nijveen, Harm committed
12

Nijveen, Harm's avatar
Nijveen, Harm committed
13
14
15
16
17
18
19
20
# Create your views here.

class GeneInfoCorrelation:
    transcript_name = ""
    description = ""
    gene_name = ""
    correlation = 0

Nijveen, Harm's avatar
Nijveen, Harm committed
21

Nijveen, Harm's avatar
Nijveen, Harm committed
22
23
24
25
26
def correlation(request):
    '''
    select a gene and experiment, and find the top genes that show correlation
    
    '''
27
    species_name = "Arabidopsis thaliana"
Nijveen, Harm's avatar
Nijveen, Harm committed
28
29
30
    experiments = Experiment.objects.filter(species__species_name=species_name).values_list('experiment_name',
                                                                                            flat=True)
    species_short_name = Species.objects.get(species_name=species_name).short_name
31

Nijveen, Harm's avatar
Nijveen, Harm committed
32
33
    if request.method == 'GET':
        if request.GET.get('experiment_name') and request.GET.get('gene_id'):
Nijveen, Harm's avatar
Nijveen, Harm committed
34
            exp_name = request.GET.get('experiment_name')
Nijveen, Harm's avatar
Nijveen, Harm committed
35
36
37
38
39
40
41
42
43
44
45
46
            gene_id = request.GET.get('gene_id')

            geneInfoList = GeneInfo.objects.filter(gene_id = gene_id, species__species_name = species_name)
            if (geneInfoList.count() == 0):
                return HttpResponse('<h1> Unknown gene %s </h1>'%gene_id)
            geneInfo = geneInfoList[0]

            queryGene = dict()
            queryGene["id"] = geneInfo.gene_id
            queryGene["name"] = geneInfo.gene_name
            queryGene["chromosome"] = geneInfo.chr
            queryGene["start"] = geneInfo.start
Nijveen, Harm's avatar
Nijveen, Harm committed
47

Nijveen, Harm's avatar
Nijveen, Harm committed
48
            corrthld = 0.9
Nijveen, Harm's avatar
Nijveen, Harm committed
49
50
51
            if request.GET.get('corrthld'):
                try:
                    corrthld = float(request.GET.get('corrthld'))
Nijveen, Harm's avatar
Nijveen, Harm committed
52
53
54
55
                    if corrthld > 1:
                        corrthld = 1
                    elif corrthld < -1:
                        corrthld = -1
Nijveen, Harm's avatar
Nijveen, Harm committed
56
57
58
                except ValueError:
                    return HttpResponse('<h1> invalid correlation threshold </h1>')

Nijveen, Harm's avatar
Nijveen, Harm committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
                # lodscores = loadLodFile(exp_name)
                # correlations = calculateCorrelation(gene_name.upper(),lodscores)
            correlations = loadPearsonFile(queryGene["id"].upper(), exp_name)
            genes = getCorrelatingGenes(correlations, corrthld)

            genelist = list()
            for gene in genes:
                g = GeneInfoCorrelation()
                g.gene_id = gene
                g.correlation = genes[gene]
                gi = GeneInfo.objects.filter(gene_id=gene)
                if gi:
                    g.description = gi[0].description
                    g.gene_name = gi[0].gene_name
                genelist.append(g)

            return render_to_response('correlation.html', {'experiment_name': exp_name,
                                                           'experiments': experiments,
                                                           'species': species_short_name,
                                                           'queryGene': queryGene,
                                                           'gene_list': sorted(genelist, key=lambda x: x.correlation,
                                                                               reverse=True),
                                                           'corrthld': corrthld})
Nijveen, Harm's avatar
Nijveen, Harm committed
82
        else:
Nijveen, Harm's avatar
Nijveen, Harm committed
83
            return render_to_response('correlation.html', {'experiments': experiments})
Nijveen, Harm's avatar
Nijveen, Harm committed
84
85


Nijveen, Harm's avatar
Nijveen, Harm committed
86
87
88
89
90
91
92
93
def getCorrelatingGenes(correlations, corrthld):
    genes = dict()
    for gene in correlations:
        if abs(correlations[gene]) >= corrthld:
            genes[gene] = correlations[gene]

    return genes

Nijveen, Harm's avatar
Nijveen, Harm committed
94
95

def loadLodFile(experiment):
Nijveen, Harm's avatar
Nijveen, Harm committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
    lod_path = Experiment.objects.get(experiment_name=experiment).lod_file
    lod_file_path = os.path.join(settings.MEDIA_ROOT, lod_path)

    lodscores = dict()
    with open(lod_file_path) as lod_file:
        header = lod_file.readline()

        for line in lod_file.readlines():
            fields = line.strip().split('\t')
            geneID = fields[0].upper()
            lodscores[fields[0]] = fields[1:]

    return lodscores


def calculateCorrelation(gene, lodscores):
    geneLodscores = lodscores[gene]
    correlations = dict()
    for s in lodscores:
        correlations[s] = numpy.corrcoef(geneLodscores, lodscores[s])[0, 1]
    del correlations[gene]
    return correlations


def loadPearsonFile(gene, experiment):
    lod_path = Experiment.objects.get(experiment_name=experiment).lod_file
    pearson_path = re.sub(r'/[^/]*$', "/pearson.csv", lod_path)
    index_path = re.sub(r'/[^/]*$', "/pearson.json", lod_path)
    pearson_file_path = os.path.join(settings.MEDIA_ROOT, pearson_path)
    index_json_path = os.path.join(settings.MEDIA_ROOT, index_path)

    correlations = dict()

    geneIndex = None
    with open(index_json_path) as index_file:
        geneIndex = json.load(index_file)

    if gene not in geneIndex:
        return correlations

    with open(pearson_file_path) as pearson_file:
        header = pearson_file.readline().strip();
        pearson_file.seek(geneIndex[gene], 0)
        line = pearson_file.readline().strip();
        genes = header.split(",")
        fields = line.split(",")
        for g in range(1, len(genes)):
            try:
                correlations[genes[g]] = float(fields[g])
            except ValueError:
                print fields[g], g, genes[g]

    return correlations

Nijveen, Harm's avatar
Nijveen, Harm committed
150
151

def createPearsonFileIndex(experiment):
Nijveen, Harm's avatar
Nijveen, Harm committed
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
    lod_path = Experiment.objects.get(experiment_name=experiment).lod_file
    pearson_path = re.sub(r'/[^/]*$', "/pearson.csv", lod_path)
    index_path = re.sub(r'/[^/]*$', "/pearson.json", lod_path)
    pearson_file_path = os.path.join(settings.MEDIA_ROOT, pearson_path)
    index_json_path = os.path.join(settings.MEDIA_ROOT, index_path)

    geneIndex = dict()
    with open(pearson_file_path) as pearson_file:
        line = pearson_file.readline()
        while line:
            filePos = pearson_file.tell()
            line = pearson_file.readline()
            geneID = line.split(',', 1)[0]
            if geneID:
                geneIndex[geneID.upper()] = filePos

    with open(index_json_path, 'w') as index_file:
        json.dump(geneIndex, index_file)