Skip to content
Snippets Groups Projects
Commit 2dad18ce authored by Ninjani's avatar Ninjani
Browse files

working on app

parent aa42f5ae
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
import base64
import os
import pickle
from pathlib import Path
import dash
import dash_bio as dashbio
import dash_core_components as dcc
import dash_html_components as html
import fire
import flask
import numpy as np
from caretta import helper
from caretta.pfam import PfamToPDB, get_pdbs_from_folder
if not Path("static").exists():
Path("static").mkdir()
external_stylesheets = ["https://cdnjs.cloudflare.com/ajax/libs/skeleton/2.0.4/skeleton.css"]
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
def feature_heatmap(data, zeros=False):
if zeros:
length = 2
z = np.zeros((length, length))
else:
keys = list(data.keys())
length = len(data[keys[0]])
z = np.zeros((len(data), length))
for i in range(len(data)):
for j in range(length):
z[i, j] = data[keys[i]][j]
return dict(data=[dict(z=z, type="heatmap", showscale=False)], layout=dict(margin=dict(l=25, r=25, t=25, b=25)))
def feature_line(features, alignment):
length = len(features[list(features.keys())[0]])
z = np.zeros((len(features), length))
keys = list(features.keys())
for i in range(len(features)):
for j in range(length):
if alignment[keys[i]][j] is not "-":
z[i, j] = features[keys[i]][j]
else:
z[i, j] = np.NaN
y = np.array([np.nanmean(z[:, x]) for x in range(z.shape[1])])
y_se = np.array([np.nanstd(z[:, x]) / np.sqrt(z.shape[1]) for x in range(z.shape[1])])
data = [dict(y=list(y + y_se) + list(y - y_se)[::-1], x=list(range(length)) + list(range(length))[::-1],
fillcolor="lightblue", fill="toself", type="scatter", mode="lines", name="Standard error",
line=dict(color='lightblue')),
dict(y=y, x=np.arange(length), type="scatter", mode="lines", name="Mean",
line=dict(color='blue'))]
return dict(data=data, layout=dict(legend=dict(x=0.5, y=1.2), margin=dict(l=25, r=25, t=25, b=25)))
def structure_plot(coord_dict):
data = []
for k, v in coord_dict.items():
x, y, z = v[:, 0], v[:, 1], v[:, 2]
data.append(dict(
x=x,
y=y,
z=z,
mode='lines',
type='scatter3d',
text=None,
name=str(k),
line=dict(
width=3,
opacity=0.8)))
layout = dict(margin=dict(l=20, r=20, t=20, b=20), clickmode='event+select',
scene=dict(xaxis=dict(visible=False, showgrid=False, showline=False),
yaxis=dict(visible=False, showgrid=False, showline=False),
zaxis=dict(visible=False, showgrid=False, showline=False)))
return dict(data=data, layout=layout)
def check_gap(sequences, i):
for seq in sequences:
if seq[i] == "-":
return True
return False
def get_feature_z(features, alignments):
core_indices = []
sequences = list(alignments.values())
for i in range(len(sequences[0])):
if not check_gap(sequences, i):
core_indices.append(i)
else:
continue
return {x: features[x][np.arange(len(sequences[0]))] for x in features}
def write_as_csv(feature_dict, file_name):
with open(file_name, "w") as f:
for protein_name, features in feature_dict.items():
f.write(";".join([protein_name] + [str(x) for x in list(features)]) + "\n")
def write_as_csv_all_features(feature_dict, file_name):
with open(file_name, "w") as f:
for protein_name, feature_dict in feature_dict.items():
for feature_name, feature_values in feature_dict.items():
f.write(";".join([protein_name, feature_name] + [str(x) for x in list(feature_values)]) + "\n")
box_style = {"box-shadow": "1px 3px 20px -4px rgba(0,0,0,0.75)",
"border-radius": "5px", "background-color": "#f9f7f7"}
box_style_lg = {"top-margin": 25,
"border-style": "solid",
"border-color": "rgb(187, 187, 187)",
"border-width": "1px",
"border-radius": "5px",
"background-color": "#edfdff"}
box_style_lr = {"top-margin": 25,
"border-style": "solid",
"border-color": "rgb(187, 187, 187)",
"border-width": "1px",
"border-radius": "5px",
"background-color": "#ffbaba"}
def compress_object(raw_object):
return base64.b64encode(pickle.dumps(raw_object, protocol=4)).decode("utf-8")
def decompress_object(compressed_object):
return pickle.loads(base64.b64decode(compressed_object))
def protein_to_aln_index(protein_index, aln_seq):
n = 0
for i in range(len(aln_seq)):
if protein_index == n:
return i
elif aln_seq[i] == "-":
pass
else:
n += 1
def aln_index_to_protein(alignment_index, alignment):
res = dict()
for k, v in alignment.items():
if v[alignment_index] == "-":
res[k] = None
else:
res[k] = alignment_index - v[:alignment_index].count("-")
return res
pfam_start = PfamToPDB(from_file=False, limit=100)
pfam_start = list(pfam_start.pfam_to_pdb_ids.keys())
pfam_start = [{"label": x, "value": x} for x in pfam_start]
introduction_text_web = """This is a demo webserver for Caretta. It generates multiple structure alignments for proteins from a selected
Pfam domain and displays the alignment, the superposed proteins, and aligned structural features.
While the server is restricted to a maximum of 50 proteins and 100 pfam domains, you can download this GUI from
https://git.wageningenur.nl/durai001/caretta and run it locally to use it on as many proteins as you'd like.
All the generated data can further be exported for downstream use."""
introduction_text = """Caretta generates multiple structure alignments for a set of input proteins and displays the alignment, the superposed proteins,
and aligned structural features. All the generated data can further be exported for downstream use.
If you have to align more than 100 proteins please use the command-line tool instead (See https://git.wageningenur.nl/durai001/caretta for instructions)."""
pfam_selection_text = """Choose a Pfam ID or select a custom folder and click on Load Structures.
Then use the dropdown box to select which PDB IDs/files to align."""
structure_alignment_text = """Click on a residue to see its position on the feature alignment in the next section."""
feature_alignment_text = """Click on a position in the feature alignment to see the corresponding residues in the previous section."""
app.layout = html.Div(children=[html.Div(html.Div([html.H1("Caretta",
style={"text-align": "center"}),
html.H3("a multiple protein structure alignment and feature extraction suite",
style={"text-align": "center"}),
html.P(introduction_text, style={"text-align": "left"})], className="row"),
className="container"),
html.Div([html.Br(), html.P(children=compress_object(PfamToPDB(from_file=False,
limit=100)), id="pfam-class",
style={"display": "none"}),
html.P(children="", id="feature-data",
style={"display": "none"}),
html.P(children=compress_object(0), id="button1",
style={"display": "none"}),
html.P(children=compress_object(0), id="button2",
style={"display": "none"}),
html.P(children="", id="alignment-data",
style={"display": "none"}),
html.Div([html.H3("Choose Structures", className="row", style={"text-align": "center"}),
html.P(pfam_selection_text, className="row"),
html.Div([html.Div(dcc.Dropdown(placeholder="Choose Pfam ID",
options=pfam_start, id="pfam-ids"), className="four columns"),
html.Button("Load Structures", className="four columns", id="load-button"),
html.Div(
dcc.Input(placeholder="Custom folder", value="", type="text", id="custom-folder"),
className="four columns")], className="row"),
html.Div([html.Div(dcc.Dropdown(placeholder="Gap open penalty (1.0)",
options=[{"label": np.round(x, decimals=2), "value": x} for x in
np.arange(0, 5, 0.1)],
id="gap-open"), className="four columns"),
html.Div(dcc.Dropdown(multi=True, id="structure-selection"),
className="four columns"),
html.Div(dcc.Dropdown(placeholder="Gap extend penalty (0.01)",
options=[{"label": np.round(x, decimals=3), "value": x} for x in
np.arange(0, 1, 0.002)],
id="gap-extend"),
className="four columns")], className="row"),
html.Br(),
html.Div(html.Button("Align Structures", className="twelve columns", id="align"),
className="row"),
dcc.Loading(id="loading-1", children=[html.Div(id="output-1", style={"text-align": "center"})],
type="default")
],
className="container"),
html.Br()], className="container", style=box_style),
html.Br(),
html.Div(children=[html.Br(),
html.H3("Sequence alignment", className="row", style={"text-align": "center"}),
html.Div(
html.P("", className="row"),
className="container"),
html.Div([html.Button("Download alignment", className="row", id="alignment-download"),
html.Div(children="", className="row", id="fasta-download-link")],
className="container"),
html.Div(html.P(id="alignment", className="twelve columns"), className="row")],
className="container", style=box_style),
html.Br(),
html.Div([html.Br(),
html.H3("Structural alignment", className="row", style={"text-align": "center"}),
html.Div(html.P(structure_alignment_text,
className="row"), className="container"),
html.Div(html.Button("Download PDB", className="row", id="pdb", style={"align": "center"}),
className="container"),
html.Div(children=dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True), id="scatter3d"),
className="row", id="aligned-proteins"), html.Br()],
className="container", style=box_style),
html.Br(),
html.Div(
[html.Br(), html.Div([html.Div([html.H3("Feature alignment", className="row", style={"text-align": "center"}),
html.P(
feature_alignment_text,
className="row"),
dcc.Dropdown(placeholder="Choose a feature", id="feature-selection",
className="six columns"),
html.Button("Display feature alignment", id="feature-button",
className="six columns")], className="row"),
html.Div([html.Div([html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")], id="exporter"),
html.Div(html.P(""), id="link-field"),
html.Br()])], className="container"),
html.Div(
html.Div(dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True), id="feature-line"),
id="feature-plot1"),
className="row"),
html.Div(
html.Div(dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True), id="heatmap"), id="feature-plot2"),
className="row")],
className="container", style=box_style),
html.Br(), html.Br(), html.Div(id="testi")])
@app.callback(dash.dependencies.Output('fasta-download-link', 'children'),
[dash.dependencies.Input('alignment-download', 'n_clicks')],
[dash.dependencies.State("alignment-data", "children"),
dash.dependencies.State("pfam-class", "children")])
def download_alignment(clicked, data, pfam_data):
if clicked and data and pfam_data:
alignment = decompress_object(data)
if not alignment:
return ""
pfam_class = decompress_object(pfam_data)
fasta = pfam_class.to_fasta_str(alignment)
fnum = np.random.randint(0, 1000000000)
fname = f"static/{fnum}.fasta"
with open(fname, "w") as f:
f.write(fasta)
return html.A(f"Download %s here" % ("alignment" + ".fasta"), href="/%s" % fname)
else:
return ""
@app.callback(dash.dependencies.Output('structure-selection', 'options'),
[dash.dependencies.Input('load-button', 'n_clicks')],
[dash.dependencies.State("pfam-class", "children"),
dash.dependencies.State("pfam-ids", "value"),
dash.dependencies.State("custom-folder", "value")])
def show_selected_atoms(clicked, pfam_class, pfam_id, custom_folder):
if clicked and pfam_class and pfam_id:
pfam_class = decompress_object(pfam_class)
pfam_structures = pfam_class.get_entries_for_pfam(pfam_id)
return [{"label": x.PDB_ID, "value": compress_object(x)} for x in pfam_structures]
elif clicked and pfam_class and custom_folder:
pdb_files = get_pdbs_from_folder(custom_folder)
return [{"label": x.PDB_ID.split("/")[-1], "value": compress_object(x)} for x in pdb_files]
else:
return [{"label": "no selection", "value": "None"}]
@app.callback([dash.dependencies.Output("output-1", "children"),
dash.dependencies.Output("alignment", "children"),
dash.dependencies.Output("aligned-proteins", "children"),
dash.dependencies.Output("feature-data", "children"),
dash.dependencies.Output("feature-selection", "options"),
dash.dependencies.Output("alignment-data", "children")],
[dash.dependencies.Input("align", "n_clicks")],
[dash.dependencies.State("structure-selection", "value"),
dash.dependencies.State("pfam-class", "children"),
dash.dependencies.State("gap-open", "value"),
dash.dependencies.State("gap-extend", "value")])
def align_structures(clicked, pdb_entries, pfam_class, gap_open, gap_extend):
if clicked and pdb_entries and pfam_class:
pfam_class = decompress_object(pfam_class)
pdb_entries = [decompress_object(x) for x in pdb_entries]
if gap_open and gap_extend:
alignment, pdbs, features = pfam_class.multiple_structure_alignment_from_pfam(pdb_entries,
gap_open_penalty=gap_open,
gap_extend_penalty=gap_extend)
else:
alignment, pdbs, features = pfam_class.multiple_structure_alignment_from_pfam(pdb_entries)
pfam_class.msa.superpose(alignment)
fasta = pfam_class.to_fasta_str(alignment)
component = dashbio.AlignmentChart(
id='my-dashbio-alignmentchart',
data=fasta, showconsensus=False, showconservation=False,
overview=None, height=300,
colorscale="hydrophobicity"
)
return "", component, dcc.Graph(figure=structure_plot({s.name: s.coords for s in pfam_class.msa.structures}),
id="scatter3d"), compress_object(
features), [{"label": x, "value": x} for x in features[list(features.keys())[0]]], compress_object(alignment)
else:
return "", "", "", compress_object(np.zeros(0)), [{"label": "no alignment present", "value": "no alignment"}], compress_object(np.zeros(0))
@app.callback([dash.dependencies.Output("feature-plot1", "children"),
dash.dependencies.Output("feature-plot2", "children")],
[dash.dependencies.Input("feature-button", "n_clicks")],
[dash.dependencies.State("feature-selection", "value"),
dash.dependencies.State("feature-data", "children"),
dash.dependencies.State("alignment-data", "children")])
def display_feature(clicked, chosen_feature, feature_dict, aln):
if clicked and chosen_feature and feature_dict:
alignment = decompress_object(aln)
feature_dict = decompress_object(feature_dict)
chosen_feature_dict = {x: feature_dict[x][chosen_feature] for x in feature_dict}
aln_np = {k: helper.aligned_string_to_array(alignment[k]) for k in alignment}
chosen_feature_aln_dict = {x: helper.get_aligned_string_data(aln_np[x], chosen_feature_dict[x]) for x in feature_dict.keys()}
z = get_feature_z(chosen_feature_aln_dict, alignment)
component1 = dcc.Graph(figure=feature_heatmap(z), id="heatmap")
component2 = dcc.Graph(figure=feature_line(z, alignment), id="feature-line")
return component2, component1
else:
return dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True),
id="feature-line", style={"display": "none"}), dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True),
id="heatmap", style={"display": "none"})
@app.callback([dash.dependencies.Output("link-field", "children"),
dash.dependencies.Output("exporter", "children")],
[dash.dependencies.Input("export", "n_clicks"),
dash.dependencies.Input("export-all", "n_clicks")],
[dash.dependencies.State("feature-selection", "value"),
dash.dependencies.State("feature-data", "children"),
dash.dependencies.State("alignment-data", "children")])
def write_output(clicked, clicked_all, chosen_feature, feature_dict, aln):
if (clicked and chosen_feature and feature_dict and aln) and not clicked_all:
alignment = decompress_object(aln)
feature_dict = decompress_object(feature_dict)
chosen_feature_dict = {x: feature_dict[x][chosen_feature] for x in feature_dict}
aln_np = {k: helper.aligned_string_to_array(alignment[k]) for k in alignment}
chosen_feature_aln_dict = {x: helper.get_aligned_string_data(aln_np[x], chosen_feature_dict[x]) for x in
feature_dict.keys()}
z = get_feature_z(chosen_feature_aln_dict, alignment)
fnum = np.random.randint(0, 1000000000)
fname = f"static/{fnum}.csv"
write_as_csv(z, fname)
return html.A(f"Download %s here" % (str(fnum) + ".csv"), href="/%s" % fname), [html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")]
elif (clicked_all and feature_dict and aln) and not clicked:
feature_dict = decompress_object(feature_dict)
fnum = np.random.randint(0, 1000000000)
fname = f"static/{fnum}.csv"
write_as_csv_all_features(feature_dict, fname)
return html.A(f"Download %s here" % (str(fnum) + ".csv"), href="/%s" % fname), [html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")]
else:
return "", [html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")]
@app.server.route('/static/<path:path>')
def download_file(path):
root_dir = os.getcwd()
return flask.send_from_directory(
os.path.join(root_dir, 'static'), path)
@app.callback([dash.dependencies.Output("feature-line", "figure"),
dash.dependencies.Output("scatter3d", "figure"),
dash.dependencies.Output("button1", "children"),
dash.dependencies.Output("button2", "children")],
[dash.dependencies.Input("scatter3d", "clickData"),
dash.dependencies.Input("feature-line", "clickData")],
[dash.dependencies.State("feature-line", "figure"),
dash.dependencies.State("scatter3d", "figure"),
dash.dependencies.State("button1", "children"),
dash.dependencies.State("button2", "children"),
dash.dependencies.State("alignment-data", "children")])
def update_features(clickdata_3d, clickdata_feature, feature_data, scatter3d_data, button1, button2, alignment_data):
if feature_data and scatter3d_data and clickdata_feature and compress_object(
(clickdata_feature["points"][0]["pointNumber"], clickdata_feature["points"][0]["curveNumber"])) != button1:
alignment = decompress_object(alignment_data)
number_of_structures = len(alignment)
clickdata = clickdata_feature
idx = clickdata["points"][0]["pointNumber"]
protein_index = clickdata["points"][0]["curveNumber"]
aln_positions = aln_index_to_protein(idx, alignment)
button1 = compress_object((idx, protein_index))
x, y = clickdata["points"][0]["x"], clickdata["points"][0]["y"]
try:
maxim, minim = np.max(feature_data["data"][0]["y"]), np.min(feature_data["data"][0]["y"])
except KeyError:
return feature_data, scatter3d_data, button1, button2
if len(feature_data["data"]) > 2:
feature_data["data"] = feature_data["data"][:-1]
feature_data["data"] += [dict(y=[minim, maxim], x=[idx, idx], type="scatter", mode="lines",
name="selected residue")]
if len(scatter3d_data["data"]) > number_of_structures:
scatter3d_data["data"] = scatter3d_data["data"][:-1]
to_add = []
for i in range(len(scatter3d_data["data"])):
d = scatter3d_data["data"][i]
k = d["name"]
p = aln_positions[k]
if p is not None:
x, y, z = d["x"][p], d["y"][p], d["z"][p]
to_add.append((x, y, z))
else:
continue
scatter3d_data["data"] += [dict(x=[x[0] for x in to_add],
y=[y[1] for y in to_add],
z=[z[2] for z in to_add], type="scatter3d", mode="markers",
name="selected residues")]
return feature_data, scatter3d_data, button1, button2
if feature_data and scatter3d_data and clickdata_3d and compress_object(
(clickdata_3d["points"][0]["pointNumber"], clickdata_3d["points"][0]["curveNumber"])) != button2:
alignment = decompress_object(alignment_data)
number_of_structures = len(alignment)
clickdata = clickdata_3d
idx = clickdata["points"][0]["pointNumber"]
protein_index = clickdata["points"][0]["curveNumber"]
button2 = compress_object((idx, protein_index))
gapped_sequence = list(alignment.values())[protein_index]
aln_index = protein_to_aln_index(idx, gapped_sequence)
x, y, z = clickdata["points"][0]["x"], clickdata["points"][0]["y"], clickdata["points"][0]["z"]
try:
maxim, minim = np.max(feature_data["data"][0]["y"]), np.min(feature_data["data"][0]["y"])
except KeyError:
return feature_data, scatter3d_data, button1, button2
if len(feature_data["data"]) > 2:
feature_data["data"] = feature_data["data"][:-1]
feature_data["data"] += [dict(y=[minim, maxim], x=[aln_index, aln_index], type="scatter", mode="lines",
name="selected_residue")]
if len(scatter3d_data["data"]) > number_of_structures:
scatter3d_data["data"] = scatter3d_data["data"][:-1]
scatter3d_data["data"] += [dict(y=[y], x=[x], z=[z], type="scatter3d", mode="markers",
name="selected residue")]
return feature_data, scatter3d_data, button1, button2
elif feature_data and scatter3d_data:
return feature_data, scatter3d_data, button1, button2
def run_server(host="0.0.0.0", port=8888):
"""
caretta-app is the GUI of caretta, capable of aligning and visualising multiple protein structures
and allowing extraction of aligned features such as bond angles, residue depths and fluctuations.
----------
host
host ip (string)
port
port
"""
app.run_server(host=host, port=port)
if __name__ == '__main__':
fire.Fire(run_server)
#! python3 #!/usr/bin/env python3
from pathlib import Path from pathlib import Path
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment