saptharishi commited on
Commit
34cb636
1 Parent(s): ab2b55a

Upload 9 files

Browse files
Files changed (10) hide show
  1. .gitattributes +1 -0
  2. Ingest.py +18 -0
  3. README.md +6 -6
  4. app.py +121 -0
  5. attorney.svg +2 -0
  6. ipc_vector_db/index.faiss +3 -0
  7. ipc_vector_db/index.pkl +3 -0
  8. logo.png +0 -0
  9. requirements.txt +11 -0
  10. user.svg +5 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ipc_vector_db/index.faiss filter=lfs diff=lfs merge=lfs -text
Ingest.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+
6
+ loader = DirectoryLoader('data', glob="./*.pdf", loader_cls=PyPDFLoader)
7
+ documents = loader.load()
8
+
9
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
10
+ texts = text_splitter.split_documents(documents)
11
+
12
+ embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
13
+
14
+ # Creates vector embeddings and saves it in the FAISS DB
15
+ faiss_db = FAISS.from_documents(texts, embedings)
16
+
17
+ # Saves and export the vector embeddings databse
18
+ faiss_db.save_local("ipc_vector_db")
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Aibot
3
- emoji: 📈
4
- colorFrom: indigo
5
- colorTo: green
6
  sdk: streamlit
7
- sdk_version: 1.32.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: LawGPT - RAG based AI Attorney Chatbot
3
+ emoji: ⚖️
4
+ colorFrom: red
5
+ colorTo: pink
6
  sdk: streamlit
7
+ sdk_version: 1.31.1
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.vectorstores import FAISS
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain_together import Together
5
+ import os
6
+ from langchain.memory import ConversationBufferWindowMemory
7
+ from langchain.chains import ConversationalRetrievalChain
8
+ import streamlit as st
9
+ import time
10
+
11
+ st.set_page_config(page_title="AttroneyGPT")
12
+ col1, col2, col3 = st.columns([1,8,1])
13
+ with col2:
14
+ st.image("logo.png")
15
+
16
+ st.markdown(
17
+ """
18
+ <style>
19
+ div[data-baseweb="input"] input {
20
+ border-color: #000000;
21
+ }
22
+ margin-top: 0 !important;
23
+ div.stButton > button:first-child {
24
+ background-color: #808080;
25
+ color:white;
26
+ }
27
+ div.stButton > button:active {
28
+ background-color: #808080;
29
+ color : white;
30
+ }
31
+
32
+ div[data-testid="stStatusWidget"] div button {
33
+ display: none;
34
+ }
35
+
36
+ .reportview-container {
37
+ margin-top: -2em;
38
+ }
39
+ #MainMenu {visibility: hidden;}
40
+ .stDeployButton {display:none;}
41
+ footer {visibility: hidden;}
42
+ #stDecoration {display:none;}
43
+ button[title="View fullscreen"]{
44
+ visibility: hidden;}
45
+ </style>
46
+ """,
47
+ unsafe_allow_html=True,
48
+ )
49
+
50
+ def reset_conversation():
51
+ st.session_state.messages = []
52
+ st.session_state.memory.clear()
53
+
54
+ if "messages" not in st.session_state:
55
+ st.session_state.messages = []
56
+
57
+ if "memory" not in st.session_state:
58
+ st.session_state.memory = ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True)
59
+
60
+ embeddings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
61
+ db = FAISS.load_local("ipc_vector_db", embeddings, allow_dangerous_deserialization=True)
62
+ db_retriever = db.as_retriever(search_type="similarity",search_kwargs={"k": 4})
63
+
64
+ prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Sericultural related Queries!!.
65
+ CONTEXT: {context}
66
+ CHAT HISTORY: {chat_history}
67
+ QUESTION: {question}
68
+ ANSWER:
69
+ </s>[INST]
70
+ """
71
+
72
+ prompt = PromptTemplate(template=prompt_template,
73
+ input_variables=['context', 'question', 'chat_history'])
74
+
75
+ # You can also use other LLMs options from https://python.langchain.com/docs/integrations/llms. Here I have used TogetherAI API
76
+ TOGETHER_AI_API= os.environ['TOGETHER_AI']="2a7c5dcdbb1049a39117ac0865c4d04008d49db31aa85a3258603817af16dbd0"
77
+ llm = Together(
78
+ model="mistralai/Mistral-7B-Instruct-v0.2",
79
+ temperature=0.5,
80
+ max_tokens=1024,
81
+ together_api_key=f"{TOGETHER_AI_API}"
82
+ )
83
+
84
+ qa = ConversationalRetrievalChain.from_llm(
85
+ llm=llm,
86
+ memory=st.session_state.memory,
87
+ retriever=db_retriever,
88
+ combine_docs_chain_kwargs={'prompt': prompt}
89
+ )
90
+
91
+ for message in st.session_state.messages:
92
+ role = message.get("role")
93
+ content = message.get("content")
94
+
95
+ with st.chat_message(role, avatar="user.svg" if role == "human" else "bot"):
96
+ st.write(content)
97
+
98
+ input_prompt = st.chat_input("message LAWGpt.....")
99
+
100
+ if input_prompt:
101
+ with st.chat_message("human",avatar="user.svg"):
102
+ st.write(input_prompt)
103
+
104
+ st.session_state.messages.append({"role":"human","content":input_prompt})
105
+ full_response = " "
106
+ with st.chat_message("bot"):
107
+ with st.spinner("Thinking..."):
108
+ result = qa.invoke(input=input_prompt)
109
+
110
+ message_placeholder = st.empty()
111
+
112
+ full_response = "⚠️ **_Note: This offers basic legal advice and is not a complete substitute for consulting a human attorney_** \n\n\n"
113
+ for chunk in result["answer"]:
114
+ full_response+=chunk
115
+ time.sleep(0.02)
116
+
117
+ message_placeholder.markdown(full_response+" ▌")
118
+ st.button('Reset All Chat 🗑️', on_click=reset_conversation)
119
+
120
+ st.session_state.messages.append({"role": "ai", "content": result["answer"], "avatar": "bot"})
121
+
attorney.svg ADDED
ipc_vector_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cac55439623c498f6c66b3effc3672a8a06236532be126130c3c14b117a8e92b
3
+ size 1090605
ipc_vector_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:185276a728ae14de633abfb4299490fd74d4e8eb78df96527500407234c6f5f7
3
+ size 321925
logo.png ADDED
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ pypdf
3
+ transformers
4
+ sentence-transformers
5
+ accelerate
6
+ faiss-cpu
7
+ streamlit
8
+ langchain-fireworks
9
+ einops
10
+ langchain_together
11
+
user.svg ADDED