|
import pandas as pd |
|
from biopandas.pdb import PandasPdb |
|
from prody import parsePDBHeader |
|
|
|
|
|
|
|
|
|
def read_pdb_to_dataframe( |
|
pdb_path, |
|
model_index: int = 1, |
|
parse_header: bool = True, |
|
) -> pd.DataFrame: |
|
""" |
|
Read a PDB file, and return a Pandas DataFrame containing the atomic coordinates and metadata. |
|
|
|
Args: |
|
pdb_path (str, optional): Path to a local PDB file to read. Defaults to None. |
|
model_index (int, optional): Index of the model to extract from the PDB file, in case |
|
it contains multiple models. Defaults to 1. |
|
parse_header (bool, optional): Whether to parse the PDB header and extract metadata. |
|
Defaults to True. |
|
|
|
Returns: |
|
pd.DataFrame: A DataFrame containing the atomic coordinates and metadata, with one row |
|
per atom |
|
""" |
|
atomic_df = PandasPdb().read_pdb(pdb_path) |
|
if parse_header: |
|
header = parsePDBHeader(pdb_path) |
|
else: |
|
header = None |
|
atomic_df = atomic_df.get_model(model_index) |
|
if len(atomic_df.df["ATOM"]) == 0: |
|
raise ValueError(f"No model found for index: {model_index}") |
|
|
|
return pd.concat([atomic_df.df["ATOM"], atomic_df.df["HETATM"]]), header |
|
|
|
from graphein.protein.graphs import label_node_id |
|
|
|
def process_dataframe(df: pd.DataFrame, granularity='CA') -> pd.DataFrame: |
|
""" |
|
Process a DataFrame of protein structure data to reduce ambiguity and simplify analysis. |
|
|
|
This function performs the following steps: |
|
1. Handles alternate locations for an atom, defaulting to keep the first one if multiple exist. |
|
2. Assigns a unique node_id to each residue in the DataFrame, using a helper function label_node_id. |
|
3. Filters the DataFrame based on specified granularity (defaults to 'CA' for alpha carbon). |
|
|
|
Parameters |
|
---------- |
|
df : pd.DataFrame |
|
The DataFrame containing protein structure data to process. It is expected to contain columns 'alt_loc' and 'atom_name'. |
|
|
|
granularity : str, optional |
|
The level of detail or perspective at which the DataFrame should be analyzed. Defaults to 'CA' (alpha carbon). |
|
""" |
|
|
|
|
|
if 'alt_loc' in df.columns: |
|
df['alt_loc'] = df['alt_loc'].replace('', 'A') |
|
df = df.loc[(df['alt_loc']=='A')] |
|
df = label_node_id(df, granularity) |
|
df = df.loc[(df['atom_name']==granularity)] |
|
return df |
|
|
|
|
|
from graphein.protein.graphs import initialise_graph_with_metadata |
|
from graphein.protein.graphs import add_nodes_to_graph |
|
from graphein.protein.visualisation import plotly_protein_structure_graph |
|
from PIL import Image |
|
import networkx as nx |
|
|
|
def take_care(pdb_path): |
|
|
|
|
|
df, header = read_pdb_to_dataframe(pdb_path) |
|
process_df = process_dataframe(df) |
|
|
|
g = initialise_graph_with_metadata(protein_df=process_df, |
|
raw_pdb_df=df, |
|
pdb_code = '3nir', |
|
granularity = 'CA' |
|
) |
|
g = add_nodes_to_graph(g) |
|
|
|
|
|
def add_backbone_edges(G: nx.Graph) -> nx.Graph: |
|
|
|
for chain_id in G.graph["chain_ids"]: |
|
|
|
chain_residues = [ |
|
(n, v) for n, v in G.nodes(data=True) if v["chain_id"] == chain_id |
|
] |
|
|
|
for i, residue in enumerate(chain_residues): |
|
try: |
|
|
|
if i == len(chain_residues) - 1: |
|
continue |
|
|
|
cond_1 = ( residue[1]["chain_id"] == chain_residues[i + 1][1]["chain_id"]) |
|
|
|
cond_2 = (abs(residue[1]["residue_number"] - chain_residues[i + 1][1]["residue_number"])== 1) |
|
|
|
|
|
if (cond_1) and (cond_2): |
|
|
|
if G.has_edge(i, i + 1): |
|
G.edges[i, i + 1]["kind"].add('backbone_bond') |
|
else: |
|
G.add_edge(residue[0],chain_residues[i + 1][0],kind={'backbone_bond'},) |
|
except IndexError as e: |
|
print(e) |
|
return G |
|
|
|
g = add_backbone_edges(g) |
|
|
|
|
|
|
|
p = plotly_protein_structure_graph( |
|
g, |
|
colour_edges_by="kind", |
|
colour_nodes_by="seq_position", |
|
label_node_ids=False, |
|
plot_title="Backbone Protein Graph", |
|
node_size_multiplier=1, |
|
) |
|
image_file = "protein_graph.png" |
|
p.write_image(image_file, format='png') |
|
|
|
|
|
|
|
image = Image.open(image_file) |
|
|
|
|
|
return image |