Skip to content
Snippets Groups Projects
Commit 957a0540 authored by Jorge Navarro Muñoz's avatar Jorge Navarro Muñoz
Browse files

Merge branch 'master' into 'lcs'

(cherry picked some changes. Will make further polishing)
# Conflicts:
#   Installation Guide.md
#   bigscape.py
parents deba3829 6f8ef97d
Branches lcs
No related tags found
No related merge requests found
......@@ -8,6 +8,12 @@
# heavily modified by Jorge Navarro 2016 #
######################################################################
# Makes sure the script can be used with Python 2 as well as Python 3.
from __future__ import print_function, division
from sys import version_info
if version_info[0]==2:
range = xrange
import os
import sys
import argparse
......@@ -169,15 +175,10 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
else:
color_contour = [50, 50, 50]
arrow += additional_tabs + "\t\t<polygon "
arrow += "class=\"" + gid + "\" "
arrow += "points=\"" + " ".join(points_coords) + "\" "
arrow += "fill=\"rgb(" + ",".join(map(str,color)) +")\" "
arrow += "fill-opacity=\"1.0\" "
arrow += "stroke=\"rgb(" + ",".join(map(str,color_contour)) + ")\" "
arrow += "stroke-width=\"" + str(gene_contour_thickness) + "\" "
arrow += category + " />\n"
arrow += "{}\t\t<polygon class=\"{}\" ".format(additional_tabs, gid)
arrow += "points=\"{}\" fill=\"rgb({})\" ".format(" ".join(points_coords), ",".join([str(val) for val in color]))
arrow += "fill-opacity=\"1.0\" stroke=\"rgb({})\" ".format(",".join([str(val) for val in color_contour]))
arrow += "stroke-width=\"{}\" {} />\n".format(str(gene_contour_thickness), category)
# paint domains. Domains on the tip of the arrow should not have corners sticking
# out of them
......@@ -193,8 +194,7 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
dccolour = domain[6]
arrow += additional_tabs + "\t\t<g>\n"
arrow += additional_tabs + "\t\t\t<title>" + dname + " (" + dacc + ")\n\"" + ddesc + "\"</title>\n"
arrow += "{}\t\t\t<title>{} ({})\n\"{}\"</title>\n".format(additional_tabs, dname, dacc, ddesc)
if strand == "+":
# calculate how far from head_start we (the horizontal guide at y=Y+internal_domain_margin)
......@@ -211,16 +211,11 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
x_margin_offset = internal_domain_margin/sin(pi - atan2(h+H/2.0,-head_length))
if (dX + dL) < head_start + collision_x - x_margin_offset:
arrow += additional_tabs + "\t\t\t<rect class=\"" + dacc + "\" "
arrow += "x=\"" + str(X+dX) + "\" "
arrow += "y=\"" + str(Y + internal_domain_margin) + "\" "
arrow += "stroke-linejoin=\"round\" "
arrow += "width=\"" + str(dL) + "\" "
arrow += "height=\"" + str(dH) + "\" "
arrow += "fill=\"rgb(" + ",".join(map(str,dcolor)) + ")\" "
arrow += "stroke=\"rgb(" + ",".join(map(str,dccolour)) + ")\" "
arrow += "stroke-width=\"" + str(domain_contour_thickness) + "\" "
arrow += "opacity=\"0.75\" />\n"
arrow += "{}\t\t\t<rect class=\"{}\" x=\"{}\" ".format(additional_tabs, dacc, str(X+dX))
arrow += "y=\"{}\" stroke-linejoin=\"round\" ".format(str(Y + internal_domain_margin))
arrow += "width=\"{}\" height=\"{}\" ".format(str(dL), str(dH))
arrow += "fill=\"rgb({})\" stroke=\"rgb({})\" ".format(",".join([str(val) for val in dcolor]), ",".join([str(val) for val in dccolour]))
arrow += "stroke-width=\"{}\" opacity=\"0.75\" />\n".format(str(domain_contour_thickness))
else:
del points[:]
......@@ -263,15 +258,12 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
for point in points:
points_coords.append(str(int(point[0])) + "," + str(int(point[1])))
arrow += additional_tabs + "\t\t\t<polygon class=\"" + dacc + "\" "
arrow += "points=\"" + " ".join(points_coords) + "\" "
arrow += "stroke-linejoin=\"round\" "
arrow += "width=\"" + str(dL) + "\" "
arrow += "height=\"" + str(dH) + "\" "
arrow += "fill=\"rgb(" + ",".join(map(str,dcolor)) + ")\" "
arrow += "stroke=\"rgb(" + ",".join(map(str,dccolour)) + ")\" "
arrow += "stroke-width=\"" + str(domain_contour_thickness) + "\" "
arrow += "opacity=\"0.75\" />\n"
arrow += "{}\t\t\t<polygon class=\"{}\" ".format(additional_tabs, dacc)
arrow += "points=\"{}\" stroke-linejoin=\"round\" ".format(" ".join(points_coords))
arrow += "width=\"{}\" height=\"{}\" ".format(str(dL), str(dH))
arrow += "fill=\"rgb({})\" ".format(",".join([str(val) for val in dcolor]))
arrow += "stroke=\"rgb({})\" ".format(",".join([str(val) for val in dccolour]))
arrow += "stroke-width=\"{}\" opacity=\"0.75\" />\n".format(str(domain_contour_thickness))
# now check other direction
else:
......@@ -286,16 +278,12 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
# nice, blocky domains
if dX > collision_x + x_margin_offset:
arrow += additional_tabs + "\t\t\t<rect class=\"" + dacc + "\" "
arrow += "x=\"" + str(X+dX) + "\" "
arrow += "y=\"" + str(Y + internal_domain_margin) + "\" "
arrow += "stroke-linejoin=\"round\" "
arrow += "width=\"" + str(dL) + "\" "
arrow += "height=\"" + str(dH) + "\" "
arrow += "fill=\"rgb(" + ",".join(map(str,dcolor)) + ")\" "
arrow += "stroke=\"rgb(" + ",".join(map(str,dccolour)) + ")\" "
arrow += "stroke-width=\"" + str(domain_contour_thickness) + "\" "
arrow += "opacity=\"0.75\" />\n"
arrow += "{}\t\t\t<rect class=\"{}\" ".format(additional_tabs, dacc)
arrow += "x=\"{}\" y=\"{}\" ".format(str(X+dX), str(Y + internal_domain_margin))
arrow += "stroke-linejoin=\"round\" width=\"{}\" height=\"{}\" ".format(str(dL), str(dH))
arrow += "fill=\"rgb({})\" ".format(",".join([str(val) for val in dcolor]))
arrow += "stroke=\"rgb({})\" ".format(",".join([str(val) for val in dccolour]))
arrow += "stroke-width=\"{}\" opacity=\"0.75\" />\n".format(str(domain_contour_thickness))
else:
del points[:]
......@@ -329,15 +317,12 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
for point in points:
points_coords.append(str(int(point[0])) + "," + str(int(point[1])))
arrow += additional_tabs + "\t\t\t<polygon class=\"" + dacc + "\" "
arrow += "points=\"" + " ".join(points_coords) + "\" "
arrow += "stroke-linejoin=\"round\" "
arrow += "width=\"" + str(dL) + "\" "
arrow += "height=\"" + str(dH) + "\" "
arrow += "fill=\"rgb(" + ",".join(map(str,dcolor)) + ")\" "
arrow += "stroke=\"rgb(" + ",".join(map(str,dccolour)) + ")\" "
arrow += "stroke-width=\"" + str(domain_contour_thickness) + "\" "
arrow += "opacity=\"0.75\" />\n"
arrow += "{}\t\t\t<polygon class=\"{}\" ".format(additional_tabs, dacc)
arrow += "points=\"{}\" stroke-linejoin=\"round\" ".format(" ".join(points_coords))
arrow += "width=\"{}\" height=\"{}\" ".format(str(dL), str(dH))
arrow += "fill=\"rgb({})\" ".format(",".join([str(val) for val in dcolor]))
arrow += "stroke=\"rgb({})\" ".format(",".join([str(val) for val in dccolour]))
arrow += "stroke-width=\"{}\" opacity=\"0.75\" />\n".format(str(domain_contour_thickness))
arrow += additional_tabs + "\t\t</g>\n"
......@@ -351,7 +336,7 @@ def draw_line(X,Y,L):
Draw a line below genes
"""
line = "<line x1=\"" + str(X) + "\" y1=\"" + str(Y) + "\" x2=\"" + str(X+L) + "\" y2=\"" + str(Y) + "\" style=\"stroke:rgb(50,50,50); stroke-width:" + str(stripe_thickness) + " \"/>\n"
line = "<line x1=\"{}\" y1=\"{}\" x2=\"{}\" y2=\"{}\" style=\"stroke:rgb(50,50,50); stroke-width:{} \"/>\n".format(str(X), str(Y), str(X+L), str(Y), str(stripe_thickness))
return line
......@@ -426,7 +411,7 @@ def SVG(write_html, outputfile, GenBankFile, pfdFile, use_pfd, color_genes, colo
header = "\t\t<div title=\"" + GenBankFile[:-4] + "\">\n"
additional_tabs = "\t\t\t"
header += additional_tabs + "<svg width=\"" + str(max_width + 2*(mX)) + "\" height=\"" + str(loci*(2*h + H + 2*mY)) + "\">\n"
header += "{}<svg width=\"{}\" height=\"{}\">\n".format(additional_tabs, str(max_width + 2*(mX)), str(loci*(2*h + H + 2*mY)))
addY = loci*(2*h + H + 2*mY)
else:
......@@ -654,7 +639,7 @@ def SVG(write_html, outputfile, GenBankFile, pfdFile, use_pfd, color_genes, colo
with open(gene_color_file, "a") as color_genes_handle:
for new_names in new_color_genes:
color_genes_handle.write(new_names + "\t" + ",".join(map(str,new_color_genes[new_names])) + "\n")
color_genes_handle.write(new_names + "\t" + ",".join([str(ncg) for ncg in new_color_genes[new_names]]) + "\n")
if len(new_color_domains) > 0:
if len(new_color_domains) < 10:
......@@ -664,7 +649,7 @@ def SVG(write_html, outputfile, GenBankFile, pfdFile, use_pfd, color_genes, colo
with open(domains_color_file, "a") as color_domains_handle:
for new_names in new_color_domains:
color_domains_handle.write(new_names + "\t" + ",".join(map(str,new_color_domains[new_names])) + "\n")
color_domains_handle.write(new_names + "\t" + ",".join([str(ncdom) for ncdom in new_color_domains[new_names]]) + "\n")
mode = "a" if write_html == True else "w"
......
......@@ -31,6 +31,12 @@ installation using Miniconda
> conda install -c bioconda hmmer biopython mafft
```
* The Affinity Propagation algorithm used currently is [pySAPC](https://pypi.python.org/pypi/pysapc/1.1.0). If you are using a Mac: `conda install -c https://conda.anaconda.org/bioinfocao pysapc`, otherwise: `pip install pysapc`
* The Affinity Propagation algorithm used currently is [pySAPC](https://pypi.python.org/pypi/pysapc/1.1.0). Install cython first:
(pysapc also installs pandas, cython, pytz, six and python-dateutil)
```
> conda install cython
```
then, if you are using a Mac: `conda install -c https://conda.anaconda.org/bioinfocao pysapc`, otherwise: `pip install pysapc`
(pysapc also installs pandas, pytz, six and python-dateutil)
This diff is collapsed.
......@@ -4015,3 +4015,4 @@ PF05938 207,71,220
PF08082 78,184,113
PF07725 90,147,197
PF01582 219,220,68
PF07678 207,214,67
......@@ -16,6 +16,11 @@ Functions used by bigscape.py
# License: GNU Affero General Public License v3 or later
# A copy of GNU AGPL v3 should have been included in this software package in LICENSE.txt.
"""
# Makes sure the script can be used with Python 2 as well as Python 3.
from __future__ import print_function
from sys import version_info
if version_info[0]==2:
range = xrange
import os
import subprocess
......@@ -62,9 +67,9 @@ def get_anchor_domains(filename):
domains.add(line.strip().split("\t")[0].split(".")[0])
return domains
except IOError:
print "You have not provided the anchor_domains.txt file."
print "if you want to make use of the anchor domains in the DSS distance metric,\
make a file that contains a Pfam domain on each line."
print("You have not provided the anchor_domains.txt file.")
print("if you want to make use of the anchor domains in the DSS distance metric, \
make a file that contains a Pfam domain on each line.")
return set()
......@@ -157,15 +162,6 @@ def write_pfd(pfd_handle, matrix):
pfd_handle.close()
def get_domains(filename):
handle = open(filename, 'r')
domains = []
for line in handle:
if line[0] != "#":
domains.append(filter(None, line.split(" "))[1])
return domains
def no_overlap(locA1, locA2, locB1, locB2):
"""Return True if there is no overlap between two regions"""
......@@ -229,11 +225,9 @@ def save_domain_seqs(filtered_matrix, fasta_dict, domains_folder, outputbase):
header = row[-1].strip()
seq = fasta_dict[header] #access the sequence by using the header
domain_file = open(os.path.join(domains_folder, domain + ".fasta"), 'a') #append to existing file
domain_file.write(">" + header + ":" + row[3] + ":" + row[4] \
+ "\n" + seq[int(row[3]):int(row[4])] + "\n") #only use the range of the pfam domain within the sequence
domain_file.write(">{}:{}:{}\n{}\n".format(header, row[3], row[4],
seq[int(row[3]):int(row[4])])) #only use the range of the pfam domain within the sequence
domain_file.close()
......@@ -430,7 +424,7 @@ def domtable_parser(gbk, dom_file):
else:
for line in dom_handle:
if line[0] != "#":
splitline = filter(None, line.split(" "))
splitline = line.split()
pfd_row = []
pfd_row.append(gbk) #add clustername or gbk filename
......@@ -440,7 +434,7 @@ def domtable_parser(gbk, dom_file):
try:
pfd_row.append(header_list[header_list.index("gid")+1]) #add gene ID if known
except ValueError:
print "No gene ID in ", gbk
print("No gene ID in " + gbk)
pfd_row.append('')
pfd_row.append(splitline[19])#first coordinate, env coord from
......
......@@ -12759,3 +12759,4 @@ Araha.8081s0001 211,252,249
Araha.24309s0001 182,250,217
Araha.24309s0002 221,251,191
Araha.24309s0003 242,205,251
eno 192,245,254
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment