Spaces:

AtomBio
/

AptaBLE

Sleeping

App Files Files Community

AtomBio commited on 4 days ago

Commit

d3248a6

•

1 Parent(s): 71b9472

Create gui.py

Browse files

Files changed (1) hide show

gui.py +109 -0

gui.py ADDED Viewed

	@@ -0,0 +1,109 @@

+from api_prediction import AptaTransPipeline_Dist
+import gradio as gr
+import pandas as pd
+import torch
+import tempfile
+from tabulate import tabulate
+from PIL import Image
+import itertools
+import os
+import RNA
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+import random
+from scipy.cluster.hierarchy import dendrogram, linkage
+# Visualization
+from Bio.Phylo.PhyloXML import Phylogeny
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio import AlignIO
+from Bio.Align.Applications import MafftCommandline
+from Bio import Phylo
+from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
+import io
+os.environ['GRADIO_SERVER_NAME'] = '0.0.0.0'
+title='DNAptaESM2 Model Infernence'
+desc='AptaBLE (cross-attention network), trained to predict the likelihood a DNA aptamer will form a complex with a target protein!\n\nPass in a FASTA-formatted file of all aptamers and input your protein target amino acid sequence. Your output scores are available for download via an Excel file.'
+global pipeline
+pipeline = AptaTransPipeline_Dist(
+    lr=1e-6,
+    weight_decay=None,
+    epochs=None,
+    model_type=None,
+    model_version=None,
+    model_save_path=None,
+    accelerate_save_path=None,
+    tensorboard_logdir=None,
+    d_model=128,
+    d_ff=512,
+    n_layers=6,
+    n_heads=8,
+    dropout=0.1,
+    load_best_pt=True, # already loads the pretrained model using the datasets included in repo -- no need to run the bottom two cells
+    device='cuda',
+    seed=1004)
+def comparison(protein, aptamer_file, analysis):
+    print('analysis: ', analysis)
+    display = []
+    table_data = pd.DataFrame()
+    r_names, aptamers = read_fasta(aptamer_file)
+    proteins = [protein for i in range(len(aptamers))]
+    df = pd.DataFrame(columns=['Protein', 'Protein Seq', 'Aptamer', 'Aptamer Seq', 'Score'])
+    # print('Number of aptamers: ', len(aptamers))
+    scores = get_scores(aptamers, proteins)
+    df['Protein'] = ['protein_prov.']*len(aptamers)
+    df['Aptamer'] = r_names
+    df['Protein Seq'] = proteins
+    df['Aptamer Seq'] = aptamers
+    df['Score'] = scores
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as temp_file:
+        with pd.ExcelWriter(temp_file.name, engine='openpyxl') as writer:
+            df.to_excel(writer, index=False)
+        temp_file_path = temp_file.name
+    print('Saving to excel!')
+    df.to_excel(f'{aptamer_file}.xlsx')
+    torch.cuda.empty_cache()
+    return '\n'.join(display), temp_file_path
+def read_fasta(file_path):
+    headers = []
+    sequences = []
+    with open(file_path, 'r') as file:
+        content = file.readlines()
+    for i in range(0, len(content), 2):
+        header = content[i].strip()
+        if header.startswith('>'):
+            headers.append(header)
+            sequences.append(content[i+1].strip())
+    return headers, sequences
+def get_scores(aptamers, proteins):
+    pipeline.model.to('cuda')
+    scores = pipeline.inference(aptamers, proteins, [0]*len(aptamers))
+    pipeline.model.to('cpu')
+    return scores
+iface = gr.Interface(
+    fn=comparison,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Protein"),
+        gr.File(type="filepath"),
+    ],
+    outputs=[
+        gr.Textbox(placeholder="Scores"),
+        gr.File(label="Download Excel")
+    ],
+    description=desc
+)
+iface.launch()