diff --git a/ArrowerSVG.py b/ArrowerSVG.py
index d462bccf79807158ea9171a8384fceea9ca6e6b6..d36710fb984ee2bff457ee53a3d25adae1389fc1 100644
--- a/ArrowerSVG.py
+++ b/ArrowerSVG.py
@@ -8,6 +8,12 @@
 #                heavily modified by Jorge Navarro 2016              #
 ######################################################################
 
+# Makes sure the script can be used with Python 2 as well as Python 3.
+from __future__ import print_function, division
+from sys import version_info
+if version_info[0]==2:
+    range = xrange
+
 import os
 import sys
 import argparse
@@ -169,15 +175,10 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
     else:
         color_contour = [50, 50, 50]
         
-    arrow += additional_tabs + "\t\t<polygon "
-    arrow += "class=\"" + gid + "\" "
-    arrow += "points=\"" + " ".join(points_coords) + "\" "
-    arrow += "fill=\"rgb(" + ",".join(map(str,color)) +")\" "
-    arrow += "fill-opacity=\"1.0\" "
-    arrow += "stroke=\"rgb(" + ",".join(map(str,color_contour)) + ")\" "
-    arrow += "stroke-width=\"" + str(gene_contour_thickness) + "\" " 
-    arrow += category + " />\n"
-
+    arrow += "{}\t\t<polygon class=\"{}\" ".format(additional_tabs, gid)
+    arrow += "points=\"{}\" fill=\"rgb({})\" ".format(" ".join(points_coords), ",".join([str(val) for val in color]))
+    arrow += "fill-opacity=\"1.0\" stroke=\"rgb({})\" ".format(",".join([str(val) for val in color_contour]))
+    arrow += "stroke-width=\"{}\" {} />\n".format(str(gene_contour_thickness), category)
     
     # paint domains. Domains on the tip of the arrow should not have corners sticking
     #  out of them
@@ -193,8 +194,7 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
         dccolour = domain[6]
         
         arrow += additional_tabs + "\t\t<g>\n"
-        arrow += additional_tabs + "\t\t\t<title>" + dname + " (" + dacc + ")\n\"" + ddesc + "\"</title>\n"
-        
+        arrow += "{}\t\t\t<title>{} ({})\n\"{}\"</title>\n".format(additional_tabs, dname, dacc, ddesc)
         
         if strand == "+":
             # calculate how far from head_start we (the horizontal guide at y=Y+internal_domain_margin)
@@ -211,16 +211,11 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
             x_margin_offset = internal_domain_margin/sin(pi - atan2(h+H/2.0,-head_length))
 
             if (dX + dL) < head_start + collision_x - x_margin_offset:
-                arrow += additional_tabs + "\t\t\t<rect class=\"" + dacc + "\" "
-                arrow += "x=\"" + str(X+dX) + "\" "
-                arrow += "y=\"" + str(Y + internal_domain_margin) + "\" "
-                arrow += "stroke-linejoin=\"round\" "
-                arrow += "width=\"" + str(dL) + "\" "
-                arrow += "height=\"" + str(dH) + "\" "
-                arrow += "fill=\"rgb(" + ",".join(map(str,dcolor)) + ")\" "
-                arrow += "stroke=\"rgb(" + ",".join(map(str,dccolour)) + ")\" "
-                arrow += "stroke-width=\"" + str(domain_contour_thickness) + "\" "
-                arrow += "opacity=\"0.75\" />\n"
+                arrow += "{}\t\t\t<rect class=\"{}\" x=\"{}\" ".format(additional_tabs, dacc, str(X+dX))
+                arrow += "y=\"{}\" stroke-linejoin=\"round\" ".format(str(Y + internal_domain_margin))
+                arrow += "width=\"{}\" height=\"{}\" ".format(str(dL), str(dH))
+                arrow += "fill=\"rgb({})\" stroke=\"rgb({})\" ".format(",".join([str(val) for val in dcolor]), ",".join([str(val) for val in dccolour]))
+                arrow += "stroke-width=\"{}\" opacity=\"0.75\" />\n".format(str(domain_contour_thickness))
             else:
                 del points[:]
                 
@@ -263,15 +258,12 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
                 for point in points:
                     points_coords.append(str(int(point[0])) + "," + str(int(point[1])))
                     
-                arrow += additional_tabs + "\t\t\t<polygon class=\"" + dacc + "\" "
-                arrow += "points=\"" + " ".join(points_coords) + "\" "
-                arrow += "stroke-linejoin=\"round\" "
-                arrow += "width=\"" + str(dL) + "\" "
-                arrow += "height=\"" + str(dH) + "\" "
-                arrow += "fill=\"rgb(" + ",".join(map(str,dcolor)) + ")\" "
-                arrow += "stroke=\"rgb(" + ",".join(map(str,dccolour)) + ")\" "
-                arrow += "stroke-width=\"" + str(domain_contour_thickness) + "\" "
-                arrow += "opacity=\"0.75\" />\n"    
+                arrow += "{}\t\t\t<polygon class=\"{}\" ".format(additional_tabs, dacc)
+                arrow += "points=\"{}\" stroke-linejoin=\"round\" ".format(" ".join(points_coords))
+                arrow += "width=\"{}\" height=\"{}\" ".format(str(dL), str(dH))
+                arrow += "fill=\"rgb({})\" ".format(",".join([str(val) for val in dcolor]))
+                arrow += "stroke=\"rgb({})\" ".format(",".join([str(val) for val in dccolour]))
+                arrow += "stroke-width=\"{}\" opacity=\"0.75\" />\n".format(str(domain_contour_thickness))
             
         # now check other direction
         else:
@@ -286,16 +278,12 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
             
             # nice, blocky domains
             if dX > collision_x + x_margin_offset:
-                arrow += additional_tabs + "\t\t\t<rect class=\"" + dacc + "\" "
-                arrow += "x=\"" + str(X+dX) + "\" "
-                arrow += "y=\"" + str(Y + internal_domain_margin) + "\" "
-                arrow += "stroke-linejoin=\"round\" "
-                arrow += "width=\"" + str(dL) + "\" "
-                arrow += "height=\"" + str(dH) + "\" "
-                arrow += "fill=\"rgb(" + ",".join(map(str,dcolor)) + ")\" "
-                arrow += "stroke=\"rgb(" + ",".join(map(str,dccolour)) + ")\" "
-                arrow += "stroke-width=\"" + str(domain_contour_thickness) + "\" "
-                arrow += "opacity=\"0.75\" />\n"
+                arrow += "{}\t\t\t<rect class=\"{}\" ".format(additional_tabs, dacc)
+                arrow += "x=\"{}\" y=\"{}\" ".format(str(X+dX), str(Y + internal_domain_margin))
+                arrow += "stroke-linejoin=\"round\" width=\"{}\" height=\"{}\" ".format(str(dL), str(dH))
+                arrow += "fill=\"rgb({})\" ".format(",".join([str(val) for val in dcolor]))
+                arrow += "stroke=\"rgb({})\" ".format(",".join([str(val) for val in dccolour]))
+                arrow += "stroke-width=\"{}\" opacity=\"0.75\" />\n".format(str(domain_contour_thickness))
             else:
                 del points[:]
                 
@@ -329,15 +317,12 @@ def draw_arrow(additional_tabs, X, Y, L, l, H, h, strand, color, color_contour,
                 for point in points:
                     points_coords.append(str(int(point[0])) + "," + str(int(point[1])))
                     
-                arrow += additional_tabs + "\t\t\t<polygon class=\"" + dacc + "\" "
-                arrow += "points=\"" + " ".join(points_coords) + "\" "
-                arrow += "stroke-linejoin=\"round\" "
-                arrow += "width=\"" + str(dL) + "\" "
-                arrow += "height=\"" + str(dH) + "\" "
-                arrow += "fill=\"rgb(" + ",".join(map(str,dcolor)) + ")\" "
-                arrow += "stroke=\"rgb(" + ",".join(map(str,dccolour)) + ")\" "
-                arrow += "stroke-width=\"" + str(domain_contour_thickness) + "\" "
-                arrow += "opacity=\"0.75\" />\n"
+                arrow += "{}\t\t\t<polygon class=\"{}\" ".format(additional_tabs, dacc)
+                arrow += "points=\"{}\" stroke-linejoin=\"round\" ".format(" ".join(points_coords))
+                arrow += "width=\"{}\" height=\"{}\" ".format(str(dL), str(dH))
+                arrow += "fill=\"rgb({})\" ".format(",".join([str(val) for val in dcolor]))
+                arrow += "stroke=\"rgb({})\" ".format(",".join([str(val) for val in dccolour]))
+                arrow += "stroke-width=\"{}\" opacity=\"0.75\" />\n".format(str(domain_contour_thickness))
         
         arrow += additional_tabs + "\t\t</g>\n"
     
@@ -351,7 +336,7 @@ def draw_line(X,Y,L):
     Draw a line below genes
     """
     
-    line = "<line x1=\"" + str(X) + "\" y1=\"" + str(Y) + "\" x2=\"" + str(X+L) + "\" y2=\"" + str(Y) + "\" style=\"stroke:rgb(50,50,50); stroke-width:" + str(stripe_thickness) + " \"/>\n"
+    line = "<line x1=\"{}\" y1=\"{}\" x2=\"{}\" y2=\"{}\" style=\"stroke:rgb(50,50,50); stroke-width:{} \"/>\n".format(str(X), str(Y), str(X+L), str(Y), str(stripe_thickness))
     
     return line
 
@@ -426,7 +411,7 @@ def SVG(write_html, outputfile, GenBankFile, pfdFile, use_pfd, color_genes, colo
         header = "\t\t<div title=\"" + GenBankFile[:-4] + "\">\n"
         additional_tabs = "\t\t\t"
         
-        header += additional_tabs + "<svg width=\"" + str(max_width + 2*(mX)) + "\" height=\"" + str(loci*(2*h + H + 2*mY)) + "\">\n"
+        header += "{}<svg width=\"{}\" height=\"{}\">\n".format(additional_tabs, str(max_width + 2*(mX)), str(loci*(2*h + H + 2*mY)))
 
         addY = loci*(2*h + H + 2*mY)
     else:
@@ -654,7 +639,7 @@ def SVG(write_html, outputfile, GenBankFile, pfdFile, use_pfd, color_genes, colo
             
         with open(gene_color_file, "a") as color_genes_handle:
             for new_names in new_color_genes:
-                color_genes_handle.write(new_names + "\t" + ",".join(map(str,new_color_genes[new_names])) + "\n")
+                color_genes_handle.write(new_names + "\t" + ",".join([str(ncg) for ncg in new_color_genes[new_names]]) + "\n")
     
     if len(new_color_domains) > 0:
         if len(new_color_domains) < 10:
@@ -664,7 +649,7 @@ def SVG(write_html, outputfile, GenBankFile, pfdFile, use_pfd, color_genes, colo
             
         with open(domains_color_file, "a") as color_domains_handle:
             for new_names in new_color_domains:
-                color_domains_handle.write(new_names + "\t" + ",".join(map(str,new_color_domains[new_names])) + "\n")
+                color_domains_handle.write(new_names + "\t" + ",".join([str(ncdom) for ncdom in new_color_domains[new_names]]) + "\n")
 
     
     mode = "a" if write_html == True else "w"
diff --git a/bigscape.py b/bigscape.py
index 33f4d70fa19ccc2a028473bd7a3b4d1054746feb..da027e27bb5288ccf2954a2a0c2fa376b143fa7e 100644
--- a/bigscape.py
+++ b/bigscape.py
@@ -27,8 +27,16 @@ https://git.wageningenur.nl/medema-group/BiG-SCAPE
 # License: GNU Affero General Public License v3 or later
 # A copy of GNU AGPL v3 should have been included in this software package in LICENSE.txt.
 """
+# Makes sure the script can be used with Python 2 as well as Python 3.
+from __future__ import print_function
+from __future__ import division
+from sys import version_info
+if version_info[0]==2:
+    range = xrange
+    import cPickle as pickle # for storing and retrieving dictionaries
+elif version_info[0]==3:
+    import pickle # for storing and retrieving dictionaries
 
-import cPickle as pickle  # for storing and retrieving dictionaries
 from math import exp, log
 import os
 import subprocess
@@ -47,12 +55,12 @@ from Bio import pairwise2
 from Bio.SubsMat.MatrixInfo import pam250 as scoring_matrix
 
 from functions import *
-from munkres import Munkres
 from ArrowerSVG import *
 
 import numpy as np
 from array import array
 from scipy.sparse import lil_matrix
+from scipy.optimize import linear_sum_assignment
 import pysapc
 
 
@@ -100,7 +108,7 @@ def get_gbk_files(inputdir, bgc_fasta_folder, min_bgc_size, exclude_gbk_str, bgc
                 print(" Skipping file " + fname)
                 continue
             if "_ORF" in fname:
-                print(" Skipping file " + fname + " (string '_ORF' is used internally)")
+                print(" Skipping file {} (string '_ORF' is used internally)".format(fname))
                 continue
             
             if " " in fname:
@@ -117,7 +125,7 @@ def get_gbk_files(inputdir, bgc_fasta_folder, min_bgc_size, exclude_gbk_str, bgc
                 # basic file verification. Substitutes check_data_integrity
                 records = list(SeqIO.parse(os.path.join(dirpath,fname), "genbank"))
             except ValueError as e:
-                print("   Error with file " + os.path.join(dirpath, fname) + ": \n    '" + str(e) + "'")
+                print("   Error with file {}: \n    '{}'".format(os.path.join(dirpath, fname), str(e)))
                 print("    (This file will be excluded from the analysis)")
                 continue
             else:
@@ -169,8 +177,7 @@ def get_gbk_files(inputdir, bgc_fasta_folder, min_bgc_size, exclude_gbk_str, bgc
                                 strand = '+'
                             else:
                                 strand = '-'
-                                
-                            fasta_header = clusterName + "_ORF" + str(cds_ctr)+ ":gid:" + str(gene_id) + ":pid:" + str(protein_id) + ":loc:" + str(gene_start) + ":" + str(gene_end) + ":strand:" + strand
+                            fasta_header = "{}_ORF{}:gid:{}:pid:{}:loc:{}:{}:strand:{}".format(clusterName, str(cds_ctr), str(gene_id), str(protein_id), str(gene_start), str(gene_end), strand)
                             fasta_header = fasta_header.replace(">","") #the coordinates might contain larger than signs, tools upstream don't like this
                             fasta_header = fasta_header.replace(" ", "") #the domtable output format (hmmscan) uses spaces as a delimiter, so these cannot be present in the fasta header
 
@@ -215,7 +222,7 @@ def get_gbk_files(inputdir, bgc_fasta_folder, min_bgc_size, exclude_gbk_str, bgc
                                     reminder = len(nt_seq)%3
                                     if reminder > 0:
                                         if fuzzy_start and fuzzy_end:
-                                            print("Warning, CDS (" + clusterName + ", " + CDS.qualifiers.get('locus_tag',"")[0] + ") has fuzzy start and end positions, and a sequence length not multiple of three. Skipping")
+                                            print("Warning, CDS ({}, {}) has fuzzy start and end positions, and a sequence length not multiple of three. Skipping".format(clusterName, CDS.qualifiers.get('locus_tag',"")[0]))
                                             break
                                         
                                         if fuzzy_start:
@@ -289,15 +296,15 @@ def get_gbk_files(inputdir, bgc_fasta_folder, min_bgc_size, exclude_gbk_str, bgc
                         else:
                             with open(outputfile,'w') as fastaHandle:
                                 for header_sequence in fasta_data:
-                                    fastaHandle.write('%s\n' % header_sequence[0])
-                                    fastaHandle.write('%s\n' % header_sequence[1])
+                                    fastaHandle.write("{}\n".format(str(header_sequence[0])))
+                                    fastaHandle.write("{}\n".format(str(header_sequence[1])))
                             
                             
                     if verbose:
-                        print("  Adding " + fname + " (" + str(bgc_size) + " bps)")
+                        print("  Adding {} ({} bps)".format(fname, str(bgc_size)))
                         
                 else:
-                    print(" Discarding " + clusterName +  " (size less than " + str(min_bgc_size) + " bp, was " + str(bgc_size) + ")")
+                    print(" Discarding {} (size less than {} bp, was {})".format(clusterName, str(min_bgc_size), str(bgc_size)))
                 
                 del fasta_data[:]
                 biosynthetic_genes.clear()
@@ -308,29 +315,34 @@ def get_gbk_files(inputdir, bgc_fasta_folder, min_bgc_size, exclude_gbk_str, bgc
     if file_counter == 1:
         sys.exit("\nError: Only one file found. Please input at least two files")
     
-    print("\n Starting with " + str(file_counter) + " files")
+    print("\n Starting with {} files".format(str(file_counter)))
     print(" Files that had its sequence extracted: " + str(file_counter - processed_sequences))
 
     return genbankDict
 
 
-def timeit(f):
-    def wrap(*args):
-        insignificant_runtime = 1 #prevents an overload 
-        time1 = time.time()
-        ret = f(*args)
-        time2 = time.time()
-        runtime = time2-time1
-        
-        runtime_string = '\t%s function took %0.3f s' % (f.func_name, runtime)
-        
-        if runtime > insignificant_runtime:
+def timeit(funct):
+    """Writes the runtimes of functions to a file and prints them on screen.
+
+    Input:
+    - funct: a function
+    Output:
+    - That function its output.
+    - Runtime of that function in a file called commands.txt and on screen.
+    """
+    def _wrap(*args):
+        start_time = time.time()
+        ret = funct(*args)
+        runtime = time.time()-start_time
+        runtime_string = '{} took {:.3f} seconds'.format(funct.__name__, runtime)
+        # To prevent insignificant runtimes from ending up in the file.
+        if runtime > 1:
             with open(os.path.join(output_folder, "runtimes.txt"), 'a') as timings_file:
                 timings_file.write(runtime_string + "\n")
-            print runtime_string
-            
+            print(runtime_string)
         return ret
-    return wrap
+    
+    return _wrap
 
 
 @timeit
@@ -357,7 +369,7 @@ def generate_network(cluster_pairs, cores):
 def generate_dist_matrix(parms):
     """Unpack data to actually launch cluster_distance for one pair of BGCs"""
     
-    cluster1Idx,cluster2Idx,bgcClassIdx = map(int,parms)
+    cluster1Idx,cluster2Idx,bgcClassIdx = [int(parm) for parm in parms]
     cluster1 = clusterNames[cluster1Idx]
     cluster2 = clusterNames[cluster2Idx]
     bgc_class = bgcClassNames[bgcClassIdx]
@@ -366,7 +378,7 @@ def generate_dist_matrix(parms):
         domain_list_A = DomainList[cluster1]
         domain_list_B = DomainList[cluster2]
     except KeyError:
-        print(" Warning: domain list for " + cluster1 + " or " + cluster2 + " was not found. Extracting from pfs files")
+        print(" Warning: domain list for {} or {} was not found. Extracting from pfs files".format(cluster1, cluster2))
         
         cluster_file1 = os.path.join(output_folder, cluster1 + ".pfs")
         cluster_file2 = os.path.join(output_folder, cluster2 + ".pfs")
@@ -376,13 +388,13 @@ def generate_dist_matrix(parms):
     
     # this really shouldn't happen if we've filtered domain-less gene clusters already
     if len(domain_list_A) == 0 or len(domain_list_B) == 0:
-        print("   Warning: Regarding distance between clusters " + cluster1 + " and " + cluster2 + ":")
+        print("   Warning: Regarding distance between clusters {} and {}:".format(cluster1, cluster2))
         if len(domain_list_A) == 0 and len(domain_list_B) == 0:
             print("   None have identified domains. Distance cannot be calculated")
         elif (domain_list_A) == 0:            
-            print("   Cluster " + cluster1 + " has no identified domains. Distance set to 1")
+            print("   Cluster {} has no identified domains. Distance set to 1".format(cluster1))
         else:
-            print("   Cluster " + cluster2 + " has no identified domains. Distance set to 1")
+            print("   Cluster {} has no identified domains. Distance set to 1".format(cluster2))
             
         # last two values (S, Sa) should really be zero but this could give rise to errors when parsing 
         # the network file (unless we catched the case S = Sa = 0
@@ -414,7 +426,7 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
     
     setA = set(A_domlist)
     setB = set(B_domlist)
-    intersect = setA.intersection(setB)
+    intersect = setA & setB
     
     S = 0
     S_anchor = 0
@@ -505,10 +517,9 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
             B_domain_sequence_slice_top[domain] = len(BGCs[B][domain])
         
     
-    
     # JACCARD INDEX
-    Jaccard = len(intersect)/ float( len(setA) + len(setB) - len(intersect))
-
+    Jaccard = len(intersect)/ len(setA|setB)
+    
 
     # DSS INDEX
     #domain_difference: Difference in sequence per domain. If one cluster does
@@ -541,9 +552,10 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
                     
     S = domain_difference # can be done because it's the first use of these
     S_anchor = domain_difference_anchor
-        
+    
     # Cases 2 and 3 (now merged)
     missing_aligned_domain_files = []
+    
     for shared_domain in intersect:
         specific_domain_list_A = BGCs[A][shared_domain]
         specific_domain_list_B = BGCs[B][shared_domain]
@@ -556,8 +568,7 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
         accumulated_distance = 0
             
         # Fill distance matrix between domain's A and B versions
-        DistanceMatrix = [[1 for col in range(num_copies_b)] for row in range(num_copies_a)]
-        
+        DistanceMatrix = np.ndarray((num_copies_a,num_copies_b))
         for domsa in range(num_copies_a):
             for domsb in range(num_copies_b):
                 sequence_tag_a = specific_domain_list_A[domsa + A_domain_sequence_slice_bottom[shared_domain]]
@@ -577,7 +588,7 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
                     if shared_domain not in missing_aligned_domain_files and verbose:
                         # this will print everytime an unfound <domain>.algn is not found for every
                         # distance calculation (but at least, not for every domain pair!)
-                        print("  Warning: " + shared_domain + ".algn not found. Trying pairwise alignment...")
+                        print("  Warning: {}.algn not found. Trying pairwise alignment...".format(shared_domain))
                         missing_aligned_domain_files.append(shared_domain)
                     
                     try:
@@ -603,9 +614,9 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
                 # Sequences *should* be of the same length unless something went
                 # wrong elsewhere
                 if len(aligned_seqA) != len(aligned_seqB):
-                    print("\tWARNING: mismatch in sequences' lengths while calculating sequence identity (" + shared_domain + ")")
-                    print("\t  Specific domain 1: " + sequence_tag_a + " len: " + str(len(aligned_seqA)))
-                    print("\t  Specific domain 2: " + sequence_tag_b + " len: " + str(len(aligned_seqB)))
+                    print("\tWARNING: mismatch in sequences' lengths while calculating sequence identity ({})".format(shared_domain))
+                    print("\t  Specific domain 1: {} len: {}".format(sequence_tag_a, str(len(aligned_seqA))))
+                    print("\t  Specific domain 2: {} len: {}".format(sequence_tag_b, str(len(aligned_seqB))))
                     seq_length = min(len(aligned_seqA), len(aligned_seqB))
                 else:
                     seq_length = len(aligned_seqA)
@@ -617,17 +628,16 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
                         else:
                             gaps += 1
                             
-                DistanceMatrix[domsa][domsb] = 1 - ( float(matches)/float(seq_length-gaps) )
+                DistanceMatrix[domsa][domsb] = 1- ( matches/(seq_length-gaps) )
                 
         #Only use the best scoring pairs
-        Hungarian = Munkres()
-        BestIndexes = Hungarian.compute(DistanceMatrix)
-        accumulated_distance = sum([DistanceMatrix[bi[0]][bi[1]] for bi in BestIndexes])
+        BestIndexes = linear_sum_assignment(DistanceMatrix)
+        accumulated_distance = DistanceMatrix[BestIndexes].sum()
         
         # the difference in number of domains accounts for the "lost" (or not duplicated) domains
         sum_seq_dist = (abs(num_copies_a-num_copies_b) + accumulated_distance)  #essentially 1-sim
         normalization_element = max(num_copies_a, num_copies_b)
-            
+                    
         if shared_domain[:7] in anchor_domains:
             S_anchor += normalization_element
             domain_difference_anchor += sum_seq_dist
@@ -636,12 +646,12 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
             domain_difference += sum_seq_dist
             
     if S_anchor != 0 and S != 0:
-        DSS_non_anchor = domain_difference / float(S)
-        DSS_anchor = domain_difference_anchor / float(S_anchor)
+        DSS_non_anchor = domain_difference / S
+        DSS_anchor = domain_difference_anchor / S_anchor
         
         # Calculate proper, proportional weight to each kind of domain
-        non_anchor_prct = S / float(S + S_anchor)
-        anchor_prct = S_anchor / float(S + S_anchor)
+        non_anchor_prct = S / (S + S_anchor)
+        anchor_prct = S_anchor / (S + S_anchor)
         
         # boost anchor subcomponent and re-normalize
         non_anchor_weight = non_anchor_prct / (anchor_prct*anchorboost + non_anchor_prct)
@@ -651,19 +661,19 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
         DSS = (non_anchor_weight*DSS_non_anchor) + (anchor_weight*DSS_anchor)
         
     elif S_anchor == 0:
-        DSS_non_anchor = domain_difference / float(S)
+        DSS_non_anchor = domain_difference / S
         DSS_anchor = 0.0
-        
+                
         DSS = DSS_non_anchor
         
     else: #only anchor domains were found
         DSS_non_anchor = 0.0
-        DSS_anchor = domain_difference_anchor / float(S_anchor)
+        DSS_anchor = domain_difference_anchor / S_anchor
         
         DSS = DSS_anchor
  
     DSS = 1-DSS #transform into similarity
- 
+    
 
     # ADJACENCY INDEX
     # calculates the Tanimoto similarity of pairs of adjacent domains
@@ -680,7 +690,7 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
             setB_pairs.add(tuple(sorted([B_domlist[l],B_domlist[l+1]])))
 
         # same treatment as in Jaccard
-        AI = float(len(setA_pairs.intersection(setB_pairs))) / float(len(setA_pairs.union(setB_pairs)))
+        AI = len(setA_pairs & setB_pairs) / len(setA_pairs | setB_pairs)
 
     Distance = 1 - (Jaccardw * Jaccard) - (DSSw * DSS) - (AIw * AI)
     
@@ -689,9 +699,9 @@ def cluster_distance(a, b, a_domlist, b_domlist, bgc_class):
         if Distance < -0.000001: # this definitely is something else...
             print("Negative distance detected!")
             print(Distance)
-            print(A + " - " + B)
-            print("J: " + str(Jaccard) + "\tDSS: " + str(DSS) + "\tAI: " + str(AI))
-            print("Jw: " + str(Jaccardw) + "\tDSSw: " + str(DSSw) + "\tAIw: " + str(AIw))
+            print("{} - {}".format(A, B))
+            print("J: {}\tDSS: {}\tAI: {}".format(str(Jaccard), str(DSS), str(AI)))
+            print("Jw: {}\tDSSw: {}\tAIw: {}".format(str(Jaccardw), str(DSSw), str(AIw)))
         Distance = 0.0
         
     return Distance, Jaccard, DSS, AI, DSS_non_anchor, DSS_anchor, S, S_anchor
@@ -798,7 +808,7 @@ def runHmmScan(fastaPath, hmmPath, outputdir, verbose):
         name = ".".join(fastaPath.split(os.sep)[-1].split(".")[:-1])
         outputName = os.path.join(outputdir, name+".domtable")
         
-        hmmscan_cmd = "hmmscan --cpu 0 --domtblout %s --cut_tc %s %s" % (outputName,hmmFile,fastaPath)
+        hmmscan_cmd = "hmmscan --cpu 0 --domtblout {} --cut_tc {} {}".format(outputName, hmmFile, fastaPath)
         if verbose == True:
             print("   " + hmmscan_cmd)
         subprocess.check_output(hmmscan_cmd, shell=True)
@@ -825,17 +835,17 @@ def parseHmmScan(hmmscanResults, pfd_folder, pfs_folder, overlapCutoff):
             
             # Save list of domains per BGC
             pfsoutput = os.path.join(pfs_folder, outputbase + ".pfs")
-            with open(pfsoutput, 'wb') as pfs_handle:
+            with open(pfsoutput, 'w') as pfs_handle:
                 pfs_handle.write(" ".join(domains))
             
             # Save more complete information of each domain per BGC
             pfdoutput = os.path.join(pfd_folder, outputbase + ".pfd")
-            with open(pfdoutput,'wb') as pfd_handle:
+            with open(pfdoutput,'w') as pfd_handle:
                 write_pfd(pfd_handle, filtered_matrix)
         else:
             # there aren't any domains in this BGC
             # delete from all data structures
-            print("  No domains where found in " + outputbase + ".domtable. Removing it from further analysis")
+            print("  No domains where found in {}.domtable. Removing it from further analysis".format(outputbase))
             info = genbankDict.get(outputbase)
             clusters.remove(outputbase)
             baseNames.remove(outputbase)
@@ -890,8 +900,7 @@ def clusterJsonBatch(outputFileBase,matrix,cutoffs=[1.0],damping=0.8,clusterClan
         if verbose:
             print("   ...done")
         numBGCs = len(bgcs)
-        bs_distances = [[float('%.3f' % simMatrix[row, col]) for col in xrange(row+1)] for row in
-                        xrange(numBGCs)]
+        bs_distances = [[float("{:.3f}".format(simMatrix[row, col])) for col in range(row+1)] for row in range(numBGCs)]
         #bs_data = [{"id": clusterNames[int(bgc)]} for bgc in bgcs]
         bs_data = []
         bgcJsonDict = {}
@@ -900,12 +909,12 @@ def clusterJsonBatch(outputFileBase,matrix,cutoffs=[1.0],damping=0.8,clusterClan
             members = familiesDict.setdefault(label, [])
             members.append(idx)
             familiesDict[label] = members
-
+            
         ## guarantee order of families and indices for later assignment
         # Families should be indexed from 0 to # families - 1
         familiesDictReIdxd = {idx:members for idx,members in enumerate(familiesDict.values())}
         familyIdxs = range(len(familiesDictReIdxd))
-
+        
         ### Use the 0.5 distance cutoff to cluster clans by default
         if clusterClans and cutoff == clanCutoff:
             famSimDict = dict()
@@ -938,7 +947,7 @@ def clusterJsonBatch(outputFileBase,matrix,cutoffs=[1.0],damping=0.8,clusterClan
                                 preference='min').fit_predict(famSimMatrix)
         else:
             clanLabels = []
-    
+            
         if len(clanLabels) > 0:
             clansDict = {}
             for idx,clanLabel in enumerate(clanLabels):
@@ -946,7 +955,7 @@ def clusterJsonBatch(outputFileBase,matrix,cutoffs=[1.0],damping=0.8,clusterClan
                 clanMembers.append(idx)
                 clansDict[clanLabel] = clanMembers
             fam2clan = dict(zip(familyIdxs,clanLabels))
-
+        
         for bgc in bgcs:
             bgcName = clusterNames[int(bgc)]
             bgcJsonDict[bgcName] = {}
@@ -984,22 +993,22 @@ def clusterJsonBatch(outputFileBase,matrix,cutoffs=[1.0],damping=0.8,clusterClan
                     orfDict[orf]["domains"].append({'code': '{} : {}'.format(pfamID,pfamDescr),'start':int(entry[3]),'end':int(entry[4]),'bitscore': float(entry[1])})
                 else:
                     orfDict[orf]["domains"].append({'code': entry[5], 'start': int(entry[3]), 'end': int(entry[4]), 'bitscore': float(entry[1])})
-            bgcJsonDict[bgcName]['orfs'] = orfDict.values()
+            bgcJsonDict[bgcName]['orfs'] = list(orfDict.values())
         bs_data = [bgcJsonDict[clusterNames[int(bgc)]] for bgc in bgcs]
-
+        
         if len(clanLabels) > 0:
-            bs_families = [{'id': 'FAM_%.3d' % family, 'members': members, 'clan': 'CLAN_%.3d' % fam2clan[family]}
+            bs_families = [{"id": "FAM_{:03d}".format(family), 'members': members, "clan": "CLAN_{:03d}".format(fam2clan[family])}
                            for family, members in familiesDictReIdxd.items()]
-            bs_clans = [{'id': 'CLAN_%.3d' % clan, 'members': members}
+            bs_clans = [{"id": "CLAN_{:03d}".format(clan), 'members': members}
                            for clan, members in enumerate(clansDict.values())]
         else:
-            bs_families = [{'id': 'FAM_%.3d' % family, 'members': members, }
+            bs_families = [{"id": "FAM_{:03d}".format(family), 'members': members, }
                            for family, members in familiesDictReIdxd.items()]
-
+        
         # column1: BGC, column2: clustering pseudo family
         if verbose:
             print("  Writing clustering file")
-        with open(outputFileBase + "_clustering_c" + str(cutoff) + ".tsv", "w") as clustering_file:
+        with open("{}_clustering_c{:4.2f}.tsv".format(outputFileBase, cutoff), "w") as clustering_file:
             i = 0
             for label in familiesDictReIdxd:
                 i += 1
@@ -1008,13 +1017,13 @@ def clusterJsonBatch(outputFileBase,matrix,cutoffs=[1.0],damping=0.8,clusterClan
 
         if verbose:
             print("  Writing JS file")
-        outputFile = "{}_cutoff{}.js".format(outputFileBase,cutoff)
+        outputFile = "{}_cutoff{:4.2f}.js".format(outputFileBase,cutoff)
         with open(outputFile, 'w') as outfile:
-            outfile.write('var bs_similarity=%s\n' % str(bs_distances))
-            outfile.write('var bs_data=%s\n' % str(bs_data))
-            outfile.write('var bs_families=%s\n' % str(bs_families))
+            outfile.write("var bs_similarity={}\n".format(str(bs_distances)))
+            outfile.write("var bs_data={}\n".format(str(bs_data)))
+            outfile.write("var bs_families={}".format(str(bs_families)))
             if len(clanLabels) > 0:
-                outfile.write('var bs_clans=%s' % str(bs_clans))
+                outfile.write("\nvar bs_clans={}\n".format(str(bs_clans)))
     return
 
 class FloatRange(object):
@@ -1024,7 +1033,7 @@ class FloatRange(object):
     def __eq__(self, other):
         return self.start <= other <= self.end
     def __repr__(self):
-        return '{0}-{1}'.format(self.start, self.end)
+        return '{}-{}'.format(self.start, self.end)
 
 def CMD_parser():
     parser = ArgumentParser()
@@ -1119,7 +1128,7 @@ if __name__=="__main__":
     
     
     if options.outputdir == "":
-        print "please provide a name for an output folder using parameter -o or --outputdir"
+        print("please provide a name for an output folder using parameter -o or --outputdir")
         sys.exit(0)
     
     global anchor_domains
@@ -1221,7 +1230,7 @@ if __name__=="__main__":
     
     sampleDict = {} # {sampleName:set(bgc1,bgc2,...)}
     gbk_files = [] # raw list of gbk file locations
-    for (cluster, (path, clusterSample)) in genbankDict.iteritems():
+    for (cluster, (path, clusterSample)) in genbankDict.items():
         gbk_files.append(path)
         for sample in clusterSample:
             clustersInSample = sampleDict.get(sample, set())
@@ -1243,7 +1252,7 @@ if __name__=="__main__":
     create_directory(pfd_folder, "pfd", False)
     create_directory(svg_folder, "SVG", False)
 
-    print("\nTrying threading on %i cores" % cores)
+    print("\nTrying threading on {} cores".format(str(cores)))
     
     """BGCs -- 
     dictionary of this structure:
@@ -1326,27 +1335,23 @@ if __name__=="__main__":
             outputfile = os.path.join(domtable_folder,outputbase + '.domtable')
             if os.path.isfile(outputfile) and os.path.getsize(outputfile) > 0:
                 alreadyDone.add(fasta)
-            
-        if len(fastaFiles - alreadyDone) == 0:
+        task_set = fastaFiles - alreadyDone
+        if len(task_set) == 0:
             print(" All fasta files had already been processed")
         elif len(alreadyDone) > 0:
-            if len(fastaFiles-alreadyDone) < 20:
-                print " Warning! The following NEW fasta file(s) will be processed: %s" % ", ".join(".".join(x.split(os.sep)[-1].split(".")[:-1]) for x in fastaFiles - alreadyDone)
+            if len(task_set) < 20:
+                print(" Warning! The following NEW fasta file(s) will be processed: {}".format(", ".join(".".join(x.split(os.sep)[-1].split(".")[:-1]) for x in task_set)))
             else:
-                print(" Warning: " + str(len(fastaFiles-alreadyDone)) + " NEW fasta files will be processed")
+                print(" Warning: {} NEW fasta files will be processed".format(str(len(task_set))))
         else:
-            print(" Predicting domains for " + str(len(fastaFiles)) + " fasta files")
-
-        task_set = fastaFiles - alreadyDone
+            print(" Predicting domains for {} fasta files".format(str(len(fastaFiles))))
         
     pool = Pool(cores,maxtasksperchild=1)
     for fastaFile in task_set:
         pool.apply_async(runHmmScan,args=(fastaFile, pfam_dir, domtable_folder, verbose))
     pool.close()
     pool.join()
-
-    print " Finished generating domtable files."
-
+    print(" Finished generating domtable files.")
 
     ### Step 3: Parse hmmscan domtable results and generate pfs and pfd files
     print("\nParsing hmmscan domtable files")
@@ -1374,16 +1379,16 @@ if __name__=="__main__":
             outputfile = os.path.join(pfd_folder, outputbase + '.pfd')
             if os.path.isfile(outputfile) and os.path.getsize(outputfile) > 0:
                 alreadyDone.add(domtable)
-                
-    if len(domtableFiles - alreadyDone) == 0: # Re-run
+    domtableFilesUnprocessed = domtableFiles - alreadyDone
+    if len(domtableFilesUnprocessed) == 0: # Re-run
         print(" All domtable files had already been processed")
     elif len(alreadyDone) > 0: # Incomplete run
-        if len(domtableFiles-alreadyDone) < 20:
-            print " Warning! The following domtable files had not been processed: %s" % ", ".join(".".join(x.split(os.sep)[-1].split(".")[:-1]) for x in domtableFiles - alreadyDone)
+        if len(domtableFilesUnprocessed) < 20:
+            print(" Warning! The following domtable files had not been processed: {}".format(", ".join(".".join(x.split(os.sep)[-1].split(".")[:-1]) for x in domtableFilesUnprocessed)))
         else:
-            print(" Warning: " + str(len(domtableFiles-alreadyDone)) + " domtable files will be processed")
+            print(" Warning: {} domtable files will be processed".format(str(len(domtableFilesUnprocessed))))
     else: # First run
-        print(" Processing " + str(len(domtableFiles)) + " domtable files")
+        print(" Processing {} domtable files".format(str(len(domtableFiles))))
 
     # If using the multiprocessing version and outputbase doesn't have any
     #  predicted domains, it's not as easy to remove if from the analysis
@@ -1411,7 +1416,7 @@ if __name__=="__main__":
         for thing in os.listdir(domains_folder):
             os.remove(os.path.join(domains_folder,thing))
 
-    print " Finished generating generating pfs and pfd files."
+    print(" Finished generating generating pfs and pfd files.")
     
 
     ### Step 4: Parse the pfs, pfd files to generate BGC dictionary, clusters, and clusters per sample objects
@@ -1451,7 +1456,7 @@ if __name__=="__main__":
                 print("   Processing: " + outputbase)
 
             pfdFile = os.path.join(pfd_folder, outputbase + ".pfd")
-            filtered_matrix = [map(lambda x: x.strip(), line.split('\t')) for line in open(pfdFile)]
+            filtered_matrix = [[part.strip() for part in line.split('\t')] for line in open(pfdFile)]
 
             # save each domain sequence from a single BGC in its corresponding file
             fasta_file = os.path.join(bgc_fasta_folder, outputbase + ".fasta")
@@ -1469,7 +1474,7 @@ if __name__=="__main__":
             del filtered_matrix[:]
             
         # store processed BGCs dictionary for future re-runs
-        with open(os.path.join(output_folder, "BGCs.dict"), "w") as BGC_file:
+        with open(os.path.join(output_folder, "BGCs.dict"), "wb") as BGC_file:
             pickle.dump(BGCs, BGC_file)
             BGC_file.close()
 
@@ -1580,7 +1585,7 @@ if __name__=="__main__":
                 # avoid multiple alignment if the domains all belong to the same BGC
                 fasta_domains.remove(domain_file)
                 if verbose:
-                    print(" Skipping Multiple Alignment for " + domain_name + " (appears only in one BGC)")
+                    print(" Skipping Multiple Alignment for {} (appears only in one BGC)".format(domain_name))
         
         sequence_tag_list.clear()
         del header_list[:]
@@ -1603,7 +1608,7 @@ if __name__=="__main__":
             # verify all tasks were completed by checking existance of alignment files
             for domain in fasta_domains:
                 if not os.path.isfile(domain[:-6]+".algn"):
-                    print("   WARNING, " + domain[:-6] + ".algn could not be found (possible issue with aligner).")
+                    print("   WARNING, {}.algn could not be found (possible issue with aligner).".format(domain[:-6]))
                        
         else:
             print(" No domain fasta files found to align")
@@ -1654,8 +1659,7 @@ if __name__=="__main__":
                     list_file.write("\t".join([bgc, bgc_info[bgc].accession_id, bgc_info[bgc].description, product, sort_bgc(product)]) + "\n")
             
             print("  Calculating all pairwise distances")
-            
-            pairs = set(map(tuple, map(sorted, combinations(mix_set, 2))))
+            pairs = set([tuple(sorted(combo)) for combo in combinations(mix_set, 2)])
             del mix_set[:]
             cluster_pairs = [(x, y, -1) for (x, y) in pairs]
             pairs.clear()
@@ -1666,9 +1670,9 @@ if __name__=="__main__":
             pathBase = os.path.join(output_folder, networks_folder_all, "all_mix")
             filenames = []
             for cutoff in cutoff_list:
-                filenames.append(pathBase + "_c%.2f.network" % cutoff)
+                filenames.append("{}_c{:.2f}.network".format(pathBase, cutoff))
             clusterJsonBatch(pathBase, network_matrix_mix, cutoffs=cutoff_list,clusterClans=options.cluster_family,clanCutoff=options.clan_cutoff)
-            cutoffs_and_filenames = zip(cutoff_list, filenames)
+            cutoffs_and_filenames = list(zip(cutoff_list, filenames))
             del filenames[:]
             write_network_matrix(network_matrix_mix, cutoffs_and_filenames, include_singletons, clusterNames, bgc_info)
             
@@ -1714,7 +1718,7 @@ if __name__=="__main__":
             for bgc_class in BGC_classes:
                 folder_name = bgc_class
                     
-                print("\n  " + folder_name + " (" + str(len(BGC_classes[bgc_class])) + " BGCs)")
+                print("\n  {} ({} BGCs)".format(folder_name, str(len(BGC_classes[bgc_class]))))
                 
                 # create output directory   
                 create_directory(os.path.join(output_folder, networks_folder_all, folder_name), "  All - " + bgc_class, False)
@@ -1732,7 +1736,7 @@ if __name__=="__main__":
                     
                 if len(BGC_classes[bgc_class]) > 1:
                     print("   Calculating all pairwise distances")
-                    pairs = set(map(tuple, map(sorted, combinations(BGC_classes[bgc_class], 2))))
+                    pairs = set([tuple(sorted(combo)) for combo in combinations(BGC_classes[bgc_class], 2)])
                     del BGC_classes[bgc_class][:]
                     cluster_pairs = [(x, y, bgcClassName2idx[bgc_class]) for (x, y) in pairs]
                     pairs.clear()
@@ -1743,11 +1747,11 @@ if __name__=="__main__":
                     pathBase = os.path.join(output_folder, networks_folder_all, folder_name, "all" + folder_name)
                     filenames = []
                     for cutoff in cutoff_list:
-                        filenames.append(pathBase + "_c%.2f.network" % cutoff)
-                    cutoffs_and_filenames = zip(cutoff_list, filenames)
+                        filenames.append("{}_c{:.2f}.network".format(pathBase, cutoff))
+                    cutoffs_and_filenames = list(zip(cutoff_list, filenames))
                     del filenames[:]
                     clusterJsonBatch(pathBase, network_matrix, cutoffs=cutoff_list,clusterClans=options.cluster_family,clanCutoff=options.clan_cutoff)
-                    write_network_matrix(network_matrix, cutoffs_and_filenames, include_singletons,clusterNames, bgc_info)
+                    write_network_matrix(network_matrix, cutoffs_and_filenames, include_singletons, clusterNames, bgc_info)
                     
                     del network_matrix[:]
                 
@@ -1764,10 +1768,10 @@ if __name__=="__main__":
             # create output directory for all samples
             create_directory(os.path.join(output_folder, networks_folder_samples), "Samples", False)
             
-            for sample, sampleClusters in sampleDict.iteritems():
+            for sample, sampleClusters in sampleDict.items():
                 print("\n Sample: " + sample)
                 if len(sampleClusters) == 1:
-                    print(" Warning: Sample size = 1 detected. Not generating network for this sample (" + sample + ")")
+                    print(" Warning: Sample size = 1 detected. Not generating network for this sample ({})".format(sample))
                 else:
                     # create output directory for this sample
                     create_directory(os.path.join(output_folder, networks_folder_samples, sample), " Samples - " + sample, False)
@@ -1793,7 +1797,7 @@ if __name__=="__main__":
                                 product = bgc_info[bgc].product
                                 list_file.write("\t".join([bgc, bgc_info[bgc].accession_id, bgc_info[bgc].description, product, sort_bgc(product)]) + "\n")
             
-                        pairs = set(map(tuple, map(sorted, combinations(mix_set, 2))))
+                        pairs = set([tuple(sorted(combo)) for combo in combinations(mix_set, 2)])
                         del mix_set[:]
                         cluster_pairs = [(x, y, -1) for (x, y) in pairs]
                         pairs.clear()
@@ -1804,10 +1808,10 @@ if __name__=="__main__":
                         pathBase = os.path.join(output_folder, networks_folder_samples, sample, "sample_" + sample + "_mix")
                         filenames = []
                         for cutoff in cutoff_list:
-                            filenames.append(pathBase + "_c%.2f.network" % cutoff)
-                        cutoffs_and_filenames = zip(cutoff_list, filenames)
+                            filenames.append("{}_c{:.2f}.network".format(pathBase, cutoff))
+                        cutoffs_and_filenames = list(zip(cutoff_list, filenames))
                         clusterJsonBatch(pathBase, network_matrix_sample, cutoffs=cutoff_list,clusterClans=options.cluster_family,clanCutoff=options.clan_cutoff)
-                        write_network_matrix(network_matrix_sample, cutoffs_and_filenames, include_singletons, clusterNames,bgc_info)
+                        write_network_matrix(network_matrix_sample, cutoffs_and_filenames, include_singletons, clusterNames, bgc_info)
                         
                         del network_matrix_sample[:]
                     
@@ -1866,7 +1870,7 @@ if __name__=="__main__":
                                     list_file.write("\t".join([bgc, bgc_info[bgc].accession_id, bgc_info[bgc].description, product, sort_bgc(product)]) + "\n")
 
                             if len(BGC_classes[bgc_class]) > 1:
-                                pairs = set(map(tuple, map(sorted, combinations(BGC_classes[bgc_class], 2))))
+                                pairs = set([tuple(sorted(combo)) for combo in combinations(BGC_classes[bgc_class], 2)])
                                 del BGC_classes[bgc_class][:]
                                 cluster_pairs = [(x, y, bgcClassName2idx[bgc_class]) for (x, y) in pairs]
                                 pairs.clear()
@@ -1877,18 +1881,18 @@ if __name__=="__main__":
                                                         "sample_" + sample + "_" + folder_name)
                                 filenames = []
                                 for cutoff in cutoff_list:
-                                    filenames.append(pathBase + "_c%.2f.network" % cutoff)
-                                cutoffs_and_filenames = zip(cutoff_list, filenames)
+                                    filenames.append("{}_c{:.2f}.network".format(pathBase, cutoff))
+                                cutoffs_and_filenames = list(zip(cutoff_list, filenames))
                                 clusterJsonBatch(pathBase, network_matrix_sample, cutoffs=cutoff_list,clusterClans=options.cluster_family,clanCutoff=options.clan_cutoff)
                                 write_network_matrix(network_matrix_sample, cutoffs_and_filenames, include_singletons, clusterNames,bgc_info)
                                 del network_matrix_sample[:]
                                 
                             del BGC_classes[bgc_class][:]
 
-    pickle.dump(bgc_info,open(os.path.join(output_folder,'bgc_info.dict'),'w'))
+    pickle.dump(bgc_info,open(os.path.join(output_folder,'bgc_info.dict'),'wb'))
     runtime = time.time()-time1
-    runtime_string = '\n\n\tMain function took %0.3f s' % (runtime)
+    runtime_string = "\n\n\tMain function took {:.3f} s".format(runtime)
     with open(os.path.join(output_folder, "runtimes.txt"), 'a') as timings_file:
         timings_file.write(runtime_string + "\n")
-    print runtime_string
+    print(runtime_string)
     
diff --git a/functions.py b/functions.py
index 2610b0f22e39c9690b4671a80bbb7d28c68b32fa..f8f0eed83ec825dc102a1d611480769a646c956b 100644
--- a/functions.py
+++ b/functions.py
@@ -16,6 +16,11 @@ Functions used by bigscape.py
 # License: GNU Affero General Public License v3 or later
 # A copy of GNU AGPL v3 should have been included in this software package in LICENSE.txt.
 """
+# Makes sure the script can be used with Python 2 as well as Python 3.
+from __future__ import print_function
+from sys import version_info
+if version_info[0]==2:
+    range = xrange
 
 import os
 import subprocess
@@ -62,9 +67,9 @@ def get_anchor_domains(filename):
                     domains.add(line.strip().split("\t")[0].split(".")[0])
         return domains
     except IOError:
-        print "You have not provided the anchor_domains.txt file."
-        print "if you want to make use of the anchor domains in the DSS distance metric,\
-        make a file that contains a Pfam domain on each line."
+        print("You have not provided the anchor_domains.txt file.")
+        print("if you want to make use of the anchor domains in the DSS distance metric, \
+make a file that contains a Pfam domain on each line.")
         return set()
         
 
@@ -157,15 +162,6 @@ def write_pfd(pfd_handle, matrix):
     pfd_handle.close() 
     
 
-def get_domains(filename):
-    handle = open(filename, 'r')
-    domains = []
-    for line in handle:
-        if line[0] != "#":
-            domains.append(filter(None, line.split(" "))[1])
-            
-    return domains
-
 
 def no_overlap(locA1, locA2, locB1, locB2):
     """Return True if there is no overlap between two regions"""
@@ -229,11 +225,9 @@ def save_domain_seqs(filtered_matrix, fasta_dict, domains_folder, outputbase):
         header = row[-1].strip()
         seq = fasta_dict[header] #access the sequence by using the header
         
-
         domain_file = open(os.path.join(domains_folder, domain + ".fasta"), 'a') #append to existing file
-        domain_file.write(">" + header + ":" + row[3] + ":" + row[4] \
-        + "\n" + seq[int(row[3]):int(row[4])] + "\n") #only use the range of the pfam domain within the sequence
-            
+        domain_file.write(">{}:{}:{}\n{}\n".format(header, row[3], row[4],
+            seq[int(row[3]):int(row[4])])) #only use the range of the pfam domain within the sequence
         domain_file.close()
 
 
@@ -430,7 +424,7 @@ def domtable_parser(gbk, dom_file):
     else:
         for line in dom_handle:        
             if line[0] != "#":
-                splitline = filter(None, line.split(" "))
+                splitline = line.split()
                 pfd_row = []
                 pfd_row.append(gbk)         #add clustername or gbk filename
                 
@@ -440,7 +434,7 @@ def domtable_parser(gbk, dom_file):
                 try:
                     pfd_row.append(header_list[header_list.index("gid")+1]) #add gene ID if known
                 except ValueError:
-                    print "No gene ID in ", gbk
+                    print("No gene ID in " + gbk)
                     pfd_row.append('')
                     
                 pfd_row.append(splitline[19])#first coordinate, env coord from