Commit 06eb4b94 authored by Ninjani's avatar Ninjani
Browse files

pdb download

parents 59879f11 55708cdd
BSD 3-Clause License
Copyright (c) 2019, Janani Durairaj, Mehmet Akdel
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
......@@ -24,16 +24,11 @@ cd caretta
### Install both the command-line interface and the web-application:
```bash
pip install -e ".[GUI]"
cd bin
chmod +x caretta-cli
chmod +x caretta-app
```
### Install only the command-line interface:
```bash
pip install .
cd bin
chmod +x caretta-cli
```
### Environment variables:
......@@ -47,14 +42,14 @@ export NUMBA_NUM_THREADS=20 # change to required number of threads
### Command-line Usage
```bash
./caretta-cli input_pdb_folder
# e.g. ./caretta-cli ../test_data
caretta-cli input_pdb_folder
# e.g. caretta-cli test_data
# caretta-cli -h for more options
```
### Web-application Usage
```bash
./caretta-app <host-ip> <port>
caretta-app <host-ip> <port>
# e.g. caretta-app localhost 8091
```
......@@ -474,7 +474,7 @@ def run_server(host="0.0.0.0", port=8888):
port
port
"""
app.run_server(host=host, port=port, debug=True)
app.run_server(host=host, port=port)
if __name__ == '__main__':
......
#!/usr/bin/env python
#!/usr/bin/env python3
import base64
import os
import pickle
from pathlib import Path
from zipfile import ZipFile
import dash
import dash_bio as dashbio
......@@ -182,12 +183,15 @@ feature_alignment_text = """Click on a position in the feature alignment to see
app.layout = html.Div(children=[html.Div(html.Div([html.H1("Caretta",
style={"text-align": "center"}),
html.H3("a multiple protein structure alignment and feature extraction suite",
style={"text-align": "center"}),
html.P(introduction_text_web, style={"text-align": "left"})], className="row"),
html.H3(
"a multiple protein structure alignment and feature extraction suite",
style={"text-align": "center"}),
html.P(introduction_text_web, style={"text-align": "left"})],
className="row"),
className="container"),
html.Div([html.Br(), html.P(children=compress_object(PfamToPDB(from_file=False,
limit=100)), id="pfam-class",
limit=100)),
id="pfam-class",
style={"display": "none"}),
html.P(children="", id="feature-data",
style={"display": "none"}),
......@@ -197,76 +201,102 @@ app.layout = html.Div(children=[html.Div(html.Div([html.H1("Caretta",
style={"display": "none"}),
html.P(children="", id="alignment-data",
style={"display": "none"}),
html.Div([html.H3("Choose Structures", className="row", style={"text-align": "center"}),
html.Div([html.H3("Choose Structures", className="row",
style={"text-align": "center"}),
html.P(pfam_selection_text_web, className="row"),
html.Div([html.Div(dcc.Dropdown(placeholder="Choose Pfam ID",
options=pfam_start, id="pfam-ids"), className="four columns"),
html.Button("Load Structures", className="four columns", id="load-button"),
options=pfam_start, id="pfam-ids"),
className="four columns"),
html.Button("Load Structures", className="four columns",
id="load-button"),
# html.Div(
# dcc.Input(placeholder="Custom folder", value="", type="text", id="custom-folder"),
# className="four columns")],
], className="row"),
html.Div([html.Div(dcc.Dropdown(placeholder="Gap open penalty (1.0)",
options=[{"label": np.round(x, decimals=2), "value": x} for x in
np.arange(0, 5, 0.1)],
id="gap-open"), className="four columns"),
html.Div(dcc.Dropdown(multi=True, id="structure-selection"),
className="four columns"),
html.Div(dcc.Dropdown(placeholder="Gap extend penalty (0.01)",
options=[{"label": np.round(x, decimals=3), "value": x} for x in
np.arange(0, 1, 0.002)],
id="gap-extend"),
className="four columns")], className="row"),
html.Div(
[html.Div(dcc.Dropdown(placeholder="Gap open penalty (1.0)",
options=[
{"label": np.round(x, decimals=2),
"value": x} for x in
np.arange(0, 5, 0.1)],
id="gap-open"),
className="four columns"),
html.Div(dcc.Dropdown(multi=True, id="structure-selection"),
className="four columns"),
html.Div(dcc.Dropdown(placeholder="Gap extend penalty (0.01)",
options=[
{"label": np.round(x, decimals=3),
"value": x} for x in
np.arange(0, 1, 0.002)],
id="gap-extend"),
className="four columns")], className="row"),
html.Br(),
html.Div(html.Button("Align Structures", className="twelve columns", id="align"),
html.Div(html.Button("Align Structures", className="twelve columns",
id="align"),
className="row"),
dcc.Loading(id="loading-1", children=[html.Div(id="output-1", style={"text-align": "center"})],
dcc.Loading(id="loading-1", children=[
html.Div(id="output-1", style={"text-align": "center"})],
type="default")
],
className="container"),
html.Br()], className="container", style=box_style),
html.Br(),
html.Div(children=[html.Br(),
html.H3("Sequence alignment", className="row", style={"text-align": "center"}),
html.H3("Sequence alignment", className="row",
style={"text-align": "center"}),
html.Div(
html.P("", className="row"),
className="container"),
html.Div([html.Button("Download alignment", className="row", id="alignment-download"),
html.Div(children="", className="row", id="fasta-download-link")],
html.Div([html.Button("Download alignment", className="row",
id="alignment-download"),
html.Div(children="", className="row",
id="fasta-download-link")],
className="container"),
html.Div(html.P(id="alignment", className="twelve columns"), className="row")],
html.Div(html.P(id="alignment", className="twelve columns"),
className="row")],
className="container", style=box_style),
html.Br(),
html.Div([html.Br(),
html.H3("Structural alignment", className="row", style={"text-align": "center"}),
html.H3("Structural alignment", className="row",
style={"text-align": "center"}),
html.Div(html.P(structure_alignment_text,
className="row"), className="container"),
html.Div(html.Button("Download PDB", className="row", id="pdb", style={"align": "center"}),
html.Div([html.Button("Download PDB", className="row", id="pdb",
style={"align": "center"}),
html.Div(children="", className="row",
id="pdb-download-link")],
className="container"),
html.Div(children=dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True), id="scatter3d"),
className="row", id="aligned-proteins"), html.Br()],
html.Div(
children=dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True),
id="scatter3d"),
className="row", id="aligned-proteins"), html.Br()],
className="container", style=box_style),
html.Br(),
html.Div(
[html.Br(), html.Div([html.Div([html.H3("Feature alignment", className="row", style={"text-align": "center"}),
html.P(
feature_alignment_text,
className="row"),
dcc.Dropdown(placeholder="Choose a feature", id="feature-selection",
className="six columns"),
html.Button("Display feature alignment", id="feature-button",
className="six columns")], className="row"),
html.Div([html.Div([html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")], id="exporter"),
html.Div(html.P(""), id="link-field"),
html.Br()])], className="container"),
[html.Br(), html.Div([html.Div(
[html.H3("Feature alignment", className="row", style={"text-align": "center"}),
html.P(
feature_alignment_text,
className="row"),
dcc.Dropdown(placeholder="Choose a feature", id="feature-selection",
className="six columns"),
html.Button("Display feature alignment", id="feature-button",
className="six columns")], className="row"),
html.Div(
[html.Div([html.Button("Export feature", id="export"),
html.Button("Export all features",
id="export-all")], id="exporter"),
html.Div(html.P(""), id="link-field"),
html.Br()])], className="container"),
html.Div(
html.Div(dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True), id="feature-line"),
html.Div(dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True),
id="feature-line"),
id="feature-plot1"),
className="row"),
html.Div(
html.Div(dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True), id="heatmap"), id="feature-plot2"),
html.Div(dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True),
id="heatmap"), id="feature-plot2"),
className="row")],
className="container", style=box_style),
html.Br(), html.Br(), html.Div(id="testi")])
......@@ -287,7 +317,29 @@ def download_alignment(clicked, data, pfam_data):
fname = f"static/{fnum}.fasta"
with open(fname, "w") as f:
f.write(fasta)
return html.A(f"Download %s here" % ("alignment" + ".fasta"), href="/%s" % fname)
return html.A(f"Download %s here" % ("alignment" + ".fasta"), href="/caretta/%s" % fname)
else:
return ""
@app.callback(dash.dependencies.Output('pdb-download-link', 'children'),
[dash.dependencies.Input('pdb', 'n_clicks')],
[dash.dependencies.State("alignment-data", "children"),
dash.dependencies.State("pfam-class", "children")])
def download_pdb(clicked, data, pfam_data):
if clicked and data and pfam_data:
alignment = decompress_object(data)
if not alignment:
return ""
pfam_class = decompress_object(pfam_data)
fnum = np.random.randint(0, 1000000000)
pfam_class.msa.output_files.pdb_folder = f"static/{fnum}_pdb"
pfam_class.msa.write_files(write_pdb=True, write_fasta=False, write_class=False, write_features=False)
pdb_zip_file = ZipFile(f"static/{fnum}_pdb.zip", mode="w")
for pdb_file in Path(pfam_class.msa.output_files.pdb_folder).glob("*.pdb"):
pdb_zip_file.write(str(pdb_file))
return html.A(f"Download %s here" % ("pdbs" + ".zip"), href="/caretta/%s" % f"static/{fnum}_pdb.zip")
else:
return ""
......@@ -340,9 +392,11 @@ def align_structures(clicked, pdb_entries, pfam_class, gap_open, gap_extend):
)
return "", component, dcc.Graph(figure=structure_plot({s.name: s.coords for s in pfam_class.msa.structures}),
id="scatter3d"), compress_object(
features), [{"label": x, "value": x} for x in features[list(features.keys())[0]]], compress_object(alignment)
features), [{"label": x, "value": x} for x in features[list(features.keys())[0]]], compress_object(
alignment)
else:
return "", "", "", compress_object(np.zeros(0)), [{"label": "no alignment present", "value": "no alignment"}], compress_object(np.zeros(0))
return "", "", "", compress_object(np.zeros(0)), [
{"label": "no alignment present", "value": "no alignment"}], compress_object(np.zeros(0))
@app.callback([dash.dependencies.Output("feature-plot1", "children"),
......@@ -357,15 +411,17 @@ def display_feature(clicked, chosen_feature, feature_dict, aln):
feature_dict = decompress_object(feature_dict)
chosen_feature_dict = {x: feature_dict[x][chosen_feature] for x in feature_dict}
aln_np = {k: helper.aligned_string_to_array(alignment[k]) for k in alignment}
chosen_feature_aln_dict = {x: helper.get_aligned_string_data(aln_np[x], chosen_feature_dict[x]) for x in feature_dict.keys()}
chosen_feature_aln_dict = {x: helper.get_aligned_string_data(aln_np[x], chosen_feature_dict[x]) for x in
feature_dict.keys()}
z = get_feature_z(chosen_feature_aln_dict, alignment)
component1 = dcc.Graph(figure=feature_heatmap(z), id="heatmap")
component2 = dcc.Graph(figure=feature_line(z, alignment), id="feature-line")
return component2, component1
else:
return dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True),
id="feature-line", style={"display": "none"}), dcc.Graph(figure=feature_heatmap([[0, 0], [0, 0]], zeros=True),
id="heatmap", style={"display": "none"})
id="feature-line", style={"display": "none"}), dcc.Graph(
figure=feature_heatmap([[0, 0], [0, 0]], zeros=True),
id="heatmap", style={"display": "none"})
@app.callback([dash.dependencies.Output("link-field", "children"),
......@@ -387,21 +443,23 @@ def write_output(clicked, clicked_all, chosen_feature, feature_dict, aln):
fnum = np.random.randint(0, 1000000000)
fname = f"static/{fnum}.csv"
write_as_csv(z, fname)
return html.A(f"Download %s here" % (str(fnum) + ".csv"), href="/%s" % fname), [html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")]
return html.A(f"Download %s here" % (str(fnum) + ".csv"), href="/caretta/%s" % fname), [
html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")]
elif (clicked_all and feature_dict and aln) and not clicked:
feature_dict = decompress_object(feature_dict)
fnum = np.random.randint(0, 1000000000)
fname = f"static/{fnum}.csv"
write_as_csv_all_features(feature_dict, fname)
return html.A(f"Download %s here" % (str(fnum) + ".csv"), href="/%s" % fname), [html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")]
return html.A(f"Download %s here" % (str(fnum) + ".csv"), href="/caretta/%s" % fname), [
html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")]
else:
return "", [html.Button("Export feature", id="export"),
html.Button("Export all features", id="export-all")]
@app.server.route('/static/<path:path>')
@app.server.route('/caretta/static/<path:path>')
def download_file(path):
root_dir = os.getcwd()
return flask.send_from_directory(
......
import multiprocessing
import subprocess
from pathlib import Path
import numpy as np
......@@ -112,14 +111,15 @@ def get_features(pdb_file: str, dssp_dir: str, only_dssp=True, force_overwrite=T
"""
pdb_file = str(pdb_file)
_, name, _ = helper.get_file_parts(pdb_file)
protein = pd.parsePDB(pdb_file).select("protein")
# if Path(pdb_file).suffix != ".pdb":
pdb_file = str(Path(dssp_dir) / f"{name}.pdb")
pd.writePDB(pdb_file, protein)
protein = pd.parsePDB(pdb_file)
if Path(pdb_file).suffix != ".pdb":
pdb_file = str(Path(dssp_dir) / f"{name}.pdb")
pd.writePDB(pdb_file, protein)
dssp_file = Path(dssp_dir) / f"{name}.dssp"
if force_overwrite or not dssp_file.exists():
dssp_file = pd.execDSSP(str(pdb_file), outputname=name, outputdir=str(dssp_dir))
protein = pd.parseDSSP(dssp=dssp_file, ag=protein, parseall=True)
protein = pd.parseDSSP(dssp=str(dssp_file), ag=protein, parseall=True)
data = get_dssp_features(protein)
if only_dssp:
return data
......@@ -175,153 +175,3 @@ def get_dssp_features(protein_dssp):
return data
def get_electrostatics(protein: pd.AtomGroup, pdb_file: str, es_dir: str, overwrite=False):
"""
Gets born and coulomb electrostatics for given protein
Parameters
----------
protein
pdb_file
es_dir
overwrite
Returns
-------
dict of born/coulomb _ Energy/x-force/y-force/z-force _ ca/cb/mean/min/max
"""
es_dir = str(es_dir)
pdb_file = str(pdb_file)
data_born = _get_electrostatics(protein, pdb_file, es_dir, es_type="born", overwrite=overwrite)
data_coulomb = _get_electrostatics(protein, pdb_file, es_dir, es_type="coulomb", overwrite=overwrite)
return {**data_born, **data_coulomb}
def _run_electrostatics(pdb_file, es_dir: str, es_type: str, overwrite=False) -> tuple:
"""
Run apbs-born / apbs-coulomb on protein
NOTE: born is 0-indexed and coulomb is 1-indexed
Parameters
----------
pdb_file
es_dir
es_type
overwrite
Returns
-------
(es_file, pqr_file, add)
"""
assert es_type == "born" or es_type == "coulomb"
_, name, _ = helper.get_file_parts(pdb_file)
pqr_file = Path(es_dir) / f"{name}.pqr"
if not pqr_file.exists():
pdb2pqr(pdb_file, pqr_file)
es_file = Path(es_dir) / f"{name}_{es_type}.txt"
if es_type == "born":
add = 0
if overwrite or not es_file.exists():
apbs_born(str(pqr_file), str(es_file))
else:
add = 1
if overwrite or not es_file.exists():
apbs_coulomb(str(pqr_file), str(es_file))
return es_file, pqr_file, add
def _get_electrostatics(protein: pd.AtomGroup, pdb_file: str, es_dir: str, es_type: str = "born", overwrite=False):
"""
Run apbs-born / apbs-coulomb on protein
NOTE: born is 0-indexed and coulomb is 1-indexed
Parameters
----------
protein
pdb_file
es_dir
es_type
overwrite
Returns
-------
dict of born/coulomb _ Energy/x-force/y-force/z-force _ ca/cb/mean/ #min/max#
"""
if not Path(pdb_file).exists():
pd.writePDB(pdb_file, protein)
es_file, pqr_file, add = _run_electrostatics(pdb_file, es_dir, es_type, overwrite)
pqr_protein = pd.parsePQR(str(pqr_file))
residue_splits = helper.group_indices(pqr_protein.getResindices())
values, value_types = parse_electrostatics_file(es_file)
data = {}
for value_type in value_types:
data[f"{es_type}_{value_type}_ca"] = np.array(
[values[index + add][value_type] if value_type in values[index + add] else 0 for index in
helper.get_alpha_indices(pqr_protein)])
data[f"{es_type}_{value_type}_cb"] = np.array(
[values[index + add][value_type] if value_type in values[index + add] else 0 for index in
helper.get_beta_indices(pqr_protein)])
data[f"{es_type}_{value_type}_mean"] = np.array(
[np.nanmean([values[x + add][value_type] for x in split if (x + add) in values and value_type in values[x + add]]) for split in
residue_splits])
return data
def pdb2pqr(pdb_file, pqr_file):
"""
Convert pdb file to pqr file
"""
exe = "/mnt/nexenta/durai001/programs/pdb2pqr-2.1.1/pdb2pqr"
command = f"{exe} --ff=amber --apbs-input {pdb_file} {pqr_file}"
subprocess.check_call(command, shell=True)
def apbs_born(pqr_file, output_file, epsilon: int = 80):
"""
Runs born electrostatics
"""
exe = "/mnt/nexenta/durai001/programs/APBS-1.5/share/apbs/tools/bin/born"
command = f"{exe} -v -f {epsilon} {pqr_file} > {output_file}"
subprocess.check_call(command, shell=True)
def apbs_coulomb(pqr_file, output_file):
"""
Runs coulomb electrostatics
"""
exe = "/mnt/nexenta/durai001/programs/APBS-1.5/share/apbs/tools/bin/coulomb"
command = f"{exe} -e -f {pqr_file} > {output_file}"
subprocess.check_call(command, shell=True)
def parse_electrostatics_file(filename) -> tuple:
"""
Parse file returned by running apbs_born or apbs_coulomb
Parameters
----------
filename
Returns
-------
(dict(atom_number: dict(value_type: value)), set(value_types))
"""
values = {}
value_types = set()
with open(filename) as f:
for i, line in enumerate(f):
if i < 10:
continue
if "Atom" not in line:
break
line = line.lstrip().rstrip()
atom_number = int(line.split(":")[0][5:])
value = float(line.split("=")[1].split()[0])
value_type = line.split(":")[1].split("=")[0].lstrip().rstrip()
value_types.add(value_type)
if atom_number not in values:
values[atom_number] = {}
values[atom_number][value_type] = value
return values, value_types
......@@ -3,7 +3,6 @@ import typing
from dataclasses import dataclass, field
from pathlib import Path
import matplotlib.pyplot as plt
import numba as nb
import numpy as np
import prody as pd
......@@ -166,6 +165,20 @@ class Structure:
features = feature_extraction.get_features(str(pdb_file), str(dssp_dir), only_dssp=only_dssp, force_overwrite=force_overwrite)
return cls(pdb_name, pdb_file, sequence, helper.secondary_to_array(features["secondary"]), features, coordinates)
@classmethod
def from_pdb_id(cls, pdb_name: str, chain: str, dssp_dir="caretta_tmp",
extract_all_features=True, force_overwrite=False):
pdb = pd.parsePDB(pdb_name).select("protein").select(f"chain {chain}")
pdb_file = pd.writePDB(pdb_name, pdb)
alpha_indices = helper.get_alpha_indices(pdb)
sequence = pdb[alpha_indices].getSequence()
coordinates = pdb[alpha_indices].getCoords().astype(np.float64)
only_dssp = (not extract_all_features)
features = feature_extraction.get_features(str(Path(pdb_file)), str(dssp_dir), only_dssp=only_dssp,
force_overwrite=force_overwrite)
return cls(pdb_name, pdb_file, sequence, helper.secondary_to_array(features["secondary"]), features,
coordinates)
@dataclass
class OutputFiles:
......@@ -237,7 +250,7 @@ class StructureMultiple:
pdb_files = parse_pdb_files(input_pdb)
if not Path(dssp_dir).exists():
Path(dssp_dir).mkdir()
pdbs = [pd.parsePDB(filename) for filename in pdb_files]
pdbs = [pd.parsePDB(filename).select("protein") for filename in pdb_files]
alpha_indices = [helper.get_alpha_indices(pdb) for pdb in pdbs]
sequences = [pdbs[i][alpha_indices[i]].getSequence() for i in range(len(pdbs))]
coordinates = [np.hstack((pdbs[i][alpha_indices[i]].getCoords().astype(np.float64), np.zeros((len(sequences[i]), 1)) + consensus_weight))
......@@ -548,33 +561,22 @@ class StructureMultiple:
if not output_pdb_folder.exists():
output_pdb_folder.mkdir()
reference_name = self.structures[0].name
reference_pdb = pd.parsePDB(str(self.structures[0].pdb_file))
reference_pdb = pd.parsePDB(self.structures[0].pdb_file)
core_indices = np.array([i for i in range(len(alignments[reference_name])) if '-' not in [alignments[n][i] for n in alignments]])
aln_ref = helper.aligned_string_to_array(alignments[reference_name])
ref_coords_core = reference_pdb[helper.get_alpha_indices(reference_pdb)].getCoords().astype(np.float64)[
np.array([aln_ref[c] for c in core_indices])]
ref_centroid = rmsd_calculations.nb_mean_axis_0(ref_coords_core)
ref_coords_core -= ref_centroid
transformation = pd.Transformation(np.eye(3), -ref_centroid)
reference_pdb = pd.applyTransformation(transformation, reference_pdb)
# reference_pdb.setCoords(reference_pdb.getCoords().astype(np.float64) - ref_centroid)
pd.writePDB(str(output_pdb_folder / f"{reference_name}.pdb"), reference_pdb)
pdb_ref_2 = pd.parsePDB(str(output_pdb_folder / f"{reference_name}.pdb"))
plt.scatter(pdb_ref_2.getCoords()[:, 0], pdb_ref_2.getCoords()[:, 1], alpha=0.3)
# plt.show()
for i in range(1, len(self.structures)):
name = self.structures[i].name
pdb = pd.parsePDB(str(self.structures[i].pdb_file))
pdb = pd.parsePDB(self.structures[i].pdb_file)
aln_name = helper.aligned_string_to_array(alignments[name])
common_coords_2 = pdb[helper.get_alpha_indices(pdb)].getCoords().astype(np.float64)[np.array([aln_name[c] for c in core_indices])]
rotation_matrix, translation_matrix = rmsd_calculations.svd_superimpose(ref_coords_core, common_coords_2)