Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -25,7 +25,7 @@ from requests_html import AsyncHTMLSession
|
|
25 |
from groq import Groq
|
26 |
from huggingface_hub import InferenceClient
|
27 |
|
28 |
-
client = InferenceClient("meta-llama/Meta-Llama-3.1-8B-Instruct")
|
29 |
|
30 |
|
31 |
# Required for saving the query & response in DB
|
@@ -38,7 +38,7 @@ logging.basicConfig(level=logging.INFO, format='%(message)s')
|
|
38 |
display_ticker=[]
|
39 |
part = "day"
|
40 |
|
41 |
-
|
42 |
# client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
|
43 |
# client = InferenceClient("google/gemma-2-2b-it")
|
44 |
client_func_call = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
|
@@ -633,14 +633,14 @@ def generate_final_response(prompt, history):
|
|
633 |
logging.info("Total context sent to llm: %s \n\n\n", token_size)
|
634 |
output=""
|
635 |
try:
|
636 |
-
for message in client.chat_completion(
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
):
|
641 |
-
|
642 |
-
#
|
643 |
-
|
644 |
output = ""
|
645 |
for response in stream:
|
646 |
output += response.token.text
|
|
|
25 |
from groq import Groq
|
26 |
from huggingface_hub import InferenceClient
|
27 |
|
28 |
+
# client = InferenceClient("meta-llama/Meta-Llama-3.1-8B-Instruct")
|
29 |
|
30 |
|
31 |
# Required for saving the query & response in DB
|
|
|
38 |
display_ticker=[]
|
39 |
part = "day"
|
40 |
|
41 |
+
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
42 |
# client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
|
43 |
# client = InferenceClient("google/gemma-2-2b-it")
|
44 |
client_func_call = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
|
|
|
633 |
logging.info("Total context sent to llm: %s \n\n\n", token_size)
|
634 |
output=""
|
635 |
try:
|
636 |
+
# for message in client.chat_completion(
|
637 |
+
# messages=[{"role": "user", "content": f"{content}"}],
|
638 |
+
# max_tokens=500,
|
639 |
+
# stream=True,
|
640 |
+
# ):
|
641 |
+
# stream = message.choices[0].delta.content
|
642 |
+
# Now start the streaming
|
643 |
+
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=True)
|
644 |
output = ""
|
645 |
for response in stream:
|
646 |
output += response.token.text
|