import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch from transformers import AutoConfig model_name = "farhan2206/dnabert2fourth" # Load the tokenizer and model # Load the configuration associated with the model config = AutoConfig.from_pretrained(model_name, trust_remote_code=True) # Load the model using the correct configuration tokenizer = AutoTokenizer.from_pretrained(model_name, config=config, trust_remote_code=True) model = AutoModelForSequenceClassification.from_pretrained(model_name, config=config, trust_remote_code=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) # Streamlit UI def main(): st.title("Epigenetic Marks Prediction") st.write("An application of DNA BERT2") # Sidebar with information st.sidebar.header("About") st.sidebar.write("This app uses DNA BERT2 to predict the presence of epigenetic marks in a given DNA sequence.") # User input user_input = st.text_area("Enter a DNA sequence:", height=150) # Predict when the user provides input if st.button("Classify Sequence"): if user_input: # Call the pred function for prediction # predicted_class, confidence = pred(user_input) predicted_class = pred(user_input) # Display the result st.subheader("Prediction Result") if predicted_class == 1: st.success("Epigenetic Mark detected!") else: st.info("No epigenetic mark found.") # # Display progress bars with percentages # st.subheader("Class Distribution") # st.write("1 - Epigenetic mark found") # st.progress(confidence) # st.text(f"{confidence * 100:.2f}%") # st.write("0 - Epigenetic mark not found") # st.progress(1 - confidence) # st.text(f"{(1 - confidence) * 100:.2f}%") else: st.warning("Please enter a DNA sequence for classification.") # Function for prediction # def pred(sequence): # encoded_input = tokenizer(sequence, return_tensors='pt') # # Pass the encoded input through the model # with torch.no_grad(): # outputs = model(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask']) # logits = outputs[0] # predicted_class = logits.argmax(-1).item() # confidence = logits.softmax(dim=-1)[0, 1].item() # return predicted_class, confidence def pred(sequence): # Move the input tensors to the GPU encoded_input = tokenizer(sequence, return_tensors='pt').to(device) # Pass the encoded input through the model with torch.no_grad(): outputs = model(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask']).to(device) logits = outputs[0] predicted_class = logits.argmax(-1).item() #confidence = logits.softmax(dim=-1)[0, 1].item() return predicted_class #, confidence if __name__ == "__main__": main() # streamlit run app.py --server.port 9000