rashiqua commited on
Commit
26c54c0
1 Parent(s): 27b459d

Upload 3 files

Browse files
Files changed (3) hide show
  1. DNABERT2-FINAL.py +67 -0
  2. Dockerfile +24 -0
  3. requirements.txt +9 -0
DNABERT2-FINAL.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertConfig
4
+ import torch
5
+
6
+
7
+ model_name = "rashiqua/dnabert2_epigenetic"
8
+
9
+
10
+ config = BertConfig.from_pretrained(model_name)
11
+
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
14
+
15
+
16
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, trust_remote_code=True, config=config)
17
+
18
+ def main():
19
+ st.title("Epigenetic Marks Prediction")
20
+ st.write("An application of DNA BERT2")
21
+
22
+
23
+ st.sidebar.header("About")
24
+ st.sidebar.write("This app uses DNA BERT2 to predict the presence of epigenetic marks in a given DNA sequence.")
25
+
26
+
27
+ user_input = st.text_area("Enter a DNA sequence:", height=150)
28
+
29
+ if st.button("Classify Sequence"):
30
+ if user_input:
31
+
32
+ predicted_class, confidence = pred(user_input)
33
+
34
+
35
+ st.subheader("Prediction Result")
36
+ if predicted_class == 1:
37
+ st.success("Epigenetic Mark detected!")
38
+ else:
39
+ st.info("No epigenetic mark found.")
40
+
41
+ st.subheader("Class Distribution")
42
+ st.write("1 - Epigenetic mark found")
43
+ st.progress(confidence)
44
+ st.text(f"{confidence * 100:.2f}%")
45
+
46
+ st.write("0 - Epigenetic mark not found")
47
+ st.progress(1 - confidence)
48
+ st.text(f"{(1 - confidence) * 100:.2f}%")
49
+
50
+ else:
51
+ st.warning("Please enter a DNA sequence for classification.")
52
+
53
+
54
+ def pred(sequence):
55
+ encoded_input = tokenizer(sequence, return_tensors='pt')
56
+
57
+ with torch.no_grad():
58
+ outputs = model(input_ids=encoded_input['input_ids'], attention_mask=encoded_input['attention_mask'])
59
+ logits = outputs[0]
60
+ predicted_class = logits.argmax(-1).item()
61
+ confidence = logits.softmax(dim=-1)[0, 1].item()
62
+
63
+ return predicted_class, confidence
64
+
65
+
66
+ if __name__ == "__main__":
67
+ main()
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12
2
+
3
+ RUN useradd -m -u 1000 user
4
+
5
+ USER user
6
+
7
+ ENV HOME=/home/user \
8
+ PATH=/home/user/.local/bin:$PATH
9
+
10
+ WORKDIR $HOME/app
11
+
12
+ RUN pip install --no-cache-dir --upgrade pip
13
+
14
+ COPY --chown=user . $HOME/app
15
+
16
+ RUN pip install --user -r requirements.txt
17
+
18
+ RUN pip uninstall -y triton
19
+
20
+ RUN mkdir -p $HOME/.cache/huggingface && chmod 777 $HOME/.cache/huggingface
21
+
22
+ EXPOSE 7860
23
+
24
+ CMD ["streamlit", "run", "DNABERT2-FINAL.py", "--server.port=7860", "--server.address=0.0.0.0"]
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ einops
2
+ transformers
3
+ peft
4
+ omegaconf
5
+ torch
6
+ evaluate
7
+ accelerate
8
+ streamlit
9
+