Skip to content
Snippets Groups Projects
Commit 21b73078 authored by Akdel's avatar Akdel
Browse files

Merge branches 'doc' and 'master' of https://git.wur.nl/durai001/caretta

parents 98fea845 ef979c90
Branches
No related tags found
No related merge requests found
......@@ -14,8 +14,8 @@ import flask
import numpy as np
from cryptography.fernet import Fernet
from caretta import helper
from caretta.pfam import PfamToPDB, get_pdbs_from_folder
from caretta import helper, msa_numba
from caretta.pfam import PdbEntry
key = Fernet.generate_key()
suite = Fernet(key)
......@@ -163,26 +163,16 @@ def aln_index_to_protein(alignment_index, alignment):
return res
pfam_start = PfamToPDB(from_file=False, limit=100)
pfam_start = list(pfam_start.pfam_to_pdb_ids.keys())
pfam_start = [{"label": x, "value": x} for x in pfam_start]
introduction_text_web = """This is the demo web-server for caretta. It generates multiple structure alignments for proteins from a selected
Pfam domain and displays the alignment, the superposed proteins, and aligned structural features.
While the server is restricted to a maximum of 50 proteins and 100 pfam domains, you can download the GUI from
https://git.wageningenur.nl/durai001/caretta and run it locally to use it on as many proteins as you'd like.
All the generated data can further be exported for downstream use."""
introduction_text = """Caretta generates multiple structure alignments for a set of input proteins and displays the alignment, the superposed proteins,
and aligned structural features. All the generated data can further be exported for downstream use.
If you have to align more than 100 proteins please use the command-line tool instead (See https://git.wageningenur.nl/durai001/caretta for instructions)."""
pfam_selection_text = """Choose a Pfam ID or select a custom folder and click on Load Structures.
Then use the dropdown box to select which PDB IDs/files to align."""
pfam_selection_text_web = """Choose a Pfam ID and click on Load Structures.
Then use the dropdown box to select which PDB IDs to align."""
If you have to align more than 100 proteins your browser may lag, please use the command-line tool instead
(See https://git.wageningenur.nl/durai001/caretta for instructions)."""
pdb_selection_text = dcc.Markdown("""Possible input options are:
* Path to a folder containing files
* List of files (one on each line)
* List of PDB IDs
""")
structure_alignment_text = """Click on a residue to see its position on the feature alignment in the next section."""
feature_alignment_text = """Click on a position in the feature alignment to see the corresponding residues in the previous section."""
......@@ -192,9 +182,7 @@ app.layout = html.Div(children=[html.Div(html.Div([html.H1("Caretta",
style={"text-align": "center"}),
html.P(introduction_text, style={"text-align": "left"})], className="row"),
className="container"),
html.Div([html.Br(), html.P(children=compress_object(PfamToPDB(from_file=False,
limit=100)), id="pfam-class",
style={"display": "none"}),
html.Div([html.Br(),
html.P(children="", id="feature-data",
style={"display": "none"}),
html.P(children=compress_object(0), id="button1",
......@@ -204,20 +192,18 @@ app.layout = html.Div(children=[html.Div(html.Div([html.H1("Caretta",
html.P(children="", id="alignment-data",
style={"display": "none"}),
html.Div([html.H3("Choose Structures", className="row", style={"text-align": "center"}),
html.P(pfam_selection_text, className="row"),
html.Div([html.Div(dcc.Dropdown(placeholder="Choose Pfam ID",
options=pfam_start, id="pfam-ids"), className="four columns"),
html.Button("Load Structures", className="four columns", id="load-button"),
html.Div(
dcc.Input(placeholder="Custom folder", value="", type="text", id="custom-folder"),
className="four columns")]
, className="row"),
html.P("Input PDB files and click on Load Structures.", className="row"),
html.Div([
html.Div(
dcc.Textarea(placeholder="PDB files", value="", id="custom-folder", required=True),
className="four columns"),
html.P(pdb_selection_text, className="four columns"),
html.Button("(Re)load Structures", className="four columns", id="load-button")
], className="row"),
html.Div([html.Div(dcc.Dropdown(placeholder="Gap open penalty (1.0)",
options=[{"label": np.round(x, decimals=2), "value": x} for x in
np.arange(0, 5, 0.1)],
id="gap-open"), className="four columns"),
html.Div(dcc.Dropdown(multi=True, id="structure-selection"),
className="four columns"),
html.Div(dcc.Dropdown(placeholder="Gap extend penalty (0.01)",
options=[{"label": np.round(x, decimals=3), "value": x} for x in
np.arange(0, 1, 0.002)],
......@@ -278,17 +264,23 @@ app.layout = html.Div(children=[html.Div(html.Div([html.H1("Caretta",
html.Br(), html.Br(), html.Div(id="testi")])
def to_fasta_str(alignment):
res = []
for k, v in alignment.items():
res.append(f">{k}")
res.append(v)
return "\n".join(res)
@app.callback(dash.dependencies.Output('fasta-download-link', 'children'),
[dash.dependencies.Input('alignment-download', 'n_clicks')],
[dash.dependencies.State("alignment-data", "children"),
dash.dependencies.State("pfam-class", "children")])
def download_alignment(clicked, data, pfam_data):
if clicked and data and pfam_data:
[dash.dependencies.State("alignment-data", "children")])
def download_alignment(clicked, data):
if clicked and data:
alignment = decompress_object(data)
if not alignment:
return ""
pfam_class = decompress_object(pfam_data)
fasta = pfam_class.to_fasta_str(alignment)
fasta = to_fasta_str(alignment)
fnum = np.random.randint(0, 1000000000)
fname = f"static/{fnum}.fasta"
with open(fname, "w") as f:
......@@ -298,23 +290,6 @@ def download_alignment(clicked, data, pfam_data):
return ""
@app.callback(dash.dependencies.Output('structure-selection', 'options'),
[dash.dependencies.Input('load-button', 'n_clicks')],
[dash.dependencies.State("pfam-class", "children"),
dash.dependencies.State("pfam-ids", "value"),
dash.dependencies.State("custom-folder", "value")])
def show_selected_atoms(clicked, pfam_class, pfam_id, custom_folder):
if clicked and pfam_class and pfam_id:
pfam_class = decompress_object(pfam_class)
pfam_structures = pfam_class.get_entries_for_pfam(pfam_id)
return [{"label": x.PDB_ID, "value": compress_object(x)} for x in pfam_structures]
elif clicked and pfam_class and custom_folder:
pdb_files = get_pdbs_from_folder(custom_folder)
return [{"label": x.PDB_ID.split("/")[-1], "value": compress_object(x)} for x in pdb_files]
else:
return [{"label": "no selection", "value": "None"}]
@app.callback([dash.dependencies.Output("output-1", "children"),
dash.dependencies.Output("alignment", "children"),
dash.dependencies.Output("aligned-proteins", "children"),
......@@ -322,29 +297,28 @@ def show_selected_atoms(clicked, pfam_class, pfam_id, custom_folder):
dash.dependencies.Output("feature-selection", "options"),
dash.dependencies.Output("alignment-data", "children")],
[dash.dependencies.Input("align", "n_clicks")],
[dash.dependencies.State("structure-selection", "value"),
dash.dependencies.State("pfam-class", "children"),
[dash.dependencies.State("custom-folder", "value"),
dash.dependencies.State("gap-open", "value"),
dash.dependencies.State("gap-extend", "value")])
def align_structures(clicked, pdb_entries, pfam_class, gap_open, gap_extend):
if clicked and pdb_entries and pfam_class:
pfam_class = decompress_object(pfam_class)
pdb_entries = [decompress_object(x) for x in pdb_entries]
if gap_open and gap_extend:
alignment, pdbs, features = pfam_class.multiple_structure_alignment_from_pfam(pdb_entries,
gap_open_penalty=gap_open,
gap_extend_penalty=gap_extend)
else:
alignment, pdbs, features = pfam_class.multiple_structure_alignment_from_pfam(pdb_entries)
pfam_class.msa.superpose(alignment)
fasta = pfam_class.to_fasta_str(alignment)
def align_structures(clicked, input_pdb, gap_open, gap_extend):
if clicked and input_pdb:
pdb_entries = [PdbEntry.from_user_input(f) for f in msa_numba.parse_pdb_files(input_pdb)]
if not gap_open:
gap_open = 1
if not gap_extend:
gap_extend = 0.01
msa_class = msa_numba.StructureMultiple.from_pdb_files([p.get_pdb()[1] for p in pdb_entries])
alignment = msa_class.align(gap_open_penalty=gap_open, gap_extend_penalty=gap_extend)
msa_class.superpose(alignment)
fasta = to_fasta_str(alignment)
component = dashbio.AlignmentChart(
id='my-dashbio-alignmentchart',
data=fasta, showconsensus=False, showconservation=False,
overview=None, height=300,
colorscale="hydrophobicity"
)
return "", component, dcc.Graph(figure=structure_plot({s.name: s.coords for s in pfam_class.msa.structures}),
features = {s.name: s.features for s in msa_class.structures}
return "", component, dcc.Graph(figure=structure_plot({s.name: s.coords for s in msa_class.structures}),
id="scatter3d"), compress_object(
features), [{"label": x, "value": x} for x in features[list(features.keys())[0]]], compress_object(alignment)
else:
......@@ -435,7 +409,7 @@ def update_features(clickdata_3d, clickdata_feature, feature_data, scatter3d_dat
protein_index = clickdata["points"][0]["curveNumber"]
aln_positions = aln_index_to_protein(idx, alignment)
button1 = compress_object((idx, protein_index))
x, y = clickdata["points"][0]["x"], clickdata["points"][0]["y"]
# x, y = clickdata["points"][0]["x"], clickdata["points"][0]["y"]
try:
maxim, minim = np.max(feature_data["data"][0]["y"]), np.min(feature_data["data"][0]["y"])
except KeyError:
......@@ -500,9 +474,8 @@ def run_server(host="0.0.0.0", port=8888):
port
port
"""
app.run_server(host=host, port=port)
app.run_server(host=host, port=port, debug=True)
if __name__ == '__main__':
fire.Fire(run_server)
......@@ -197,8 +197,7 @@ def parse_pdb_files(input_pdb: str) -> typing.List[typing.Union[str, Path]]:
with open(input_pdb) as f:
pdb_files = f.read().strip().split('\n')
else:
print("Weird string") # TODO throw error
return []
pdb_files = str(input_pdb).split('\n')
else:
pdb_files = list(input_pdb)
if not Path(pdb_files[0]).is_file():
......
import glob
from dataclasses import dataclass
from pathlib import Path
......@@ -46,15 +45,16 @@ class PdbEntry:
if self.PDB_file is not None:
pdb_obj = pd.parsePDB(self.PDB_file)
else:
# if self.CHAIN_ID == "":
# chain_id = 'A'
pdb_obj = pd.parsePDB(self.PDB_ID, chain=self.CHAIN_ID) # TODO : change mkdir and etc..
if self.PdbResNumStart == -1 and self.PdbResNumEnd == -1:
pdb_obj = pdb_obj.select(f"protein")
else:
pdb_obj = pdb_obj.select(f"protein and resnum {self.PdbResNumStart} : {self.PdbResNumEnd + 1}")
filename = f"{self.PDB_ID}_{self.CHAIN_ID}_{self.PdbResNumStart}.pdb"
pd.writePDB(filename, pdb_obj)
if self.PDB_file is None:
filename = f"{self.PDB_ID}_{self.CHAIN_ID}_{self.PdbResNumStart}.pdb"
pd.writePDB(filename, pdb_obj)
else:
filename = self.PDB_file
return pd.parsePDB(filename), filename
def get_features(self, from_file=None):
......@@ -66,7 +66,10 @@ class PdbEntry:
@property
def filename(self):
return f"{self.PDB_ID}_{self.CHAIN_ID}_{self.PdbResNumStart}.pdb"
if self.PDB_file is None:
return f"{self.PDB_ID}_{self.CHAIN_ID}_{self.PdbResNumStart}.pdb"
else:
return self.PDB_file
@property
def unique_name(self):
......@@ -74,10 +77,10 @@ class PdbEntry:
def get_pdbs_from_folder(path):
file_names = glob.glob(f"{path}/*.pdb")
file_names = Path(path).glob("*.pdb")
res = []
for f in file_names:
res.append(PdbEntry.from_user_input(f))
res.append(PdbEntry.from_user_input(str(f)))
return res
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment