Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import spaces
|
2 |
import datetime
|
|
|
3 |
import urllib.parse
|
4 |
import json
|
5 |
import asyncio
|
@@ -23,7 +24,7 @@ moneycontrol_news = ""
|
|
23 |
earning_stocks = ""
|
24 |
part = "day"
|
25 |
|
26 |
-
|
27 |
|
28 |
client = InferenceClient(
|
29 |
"mistralai/Mistral-7B-Instruct-v0.3"
|
@@ -67,7 +68,7 @@ def latest_earning():
|
|
67 |
|
68 |
def todays_news():
|
69 |
url = 'https://trendlyne.com/markets-today/'
|
70 |
-
|
71 |
# Fetch the HTML content of the webpage
|
72 |
html_content = requests.get(url).text
|
73 |
soup = BeautifulSoup(html_content, 'html.parser')
|
@@ -89,8 +90,8 @@ def todays_news():
|
|
89 |
notification.append(insight.find(class_='insight-notification').text.strip())
|
90 |
|
91 |
df = pd.DataFrame({"Timestamp": timestamps, "Stock": stock_names, "Label": insight_label, "Notification": notification})
|
92 |
-
|
93 |
-
|
94 |
df_dict = df.to_dict('records')
|
95 |
return df_dict
|
96 |
|
@@ -115,8 +116,8 @@ def get_moneycontrol_news():
|
|
115 |
linkx.append(a_tag['href'])
|
116 |
filtered_links = list(set([link for link in linkx if link.endswith('.html')]))
|
117 |
else:
|
118 |
-
# If the request was not successful,
|
119 |
-
|
120 |
return
|
121 |
|
122 |
common_content = ""
|
@@ -137,7 +138,7 @@ def get_moneycontrol_news():
|
|
137 |
split_author = split_article.split('author', 1)[0]
|
138 |
heading = "Heading -" + link.split('/')[-1].replace("-", " ")
|
139 |
body = "Body -" + re.sub('[:\n-\";]|amp', ' ', split_author)
|
140 |
-
|
141 |
common_content = common_content + str({heading: body}) + "," + "\n"
|
142 |
|
143 |
today = datetime.date.today()
|
@@ -204,7 +205,7 @@ def get_the_ticker(stock_name):
|
|
204 |
raw_query = f"YAHOO FINANCE TICKER SYMBOL OF {stock.upper()}"
|
205 |
query = raw_query.replace(" ", "+")
|
206 |
url = f'https://www.google.com/search?q={query}&FORM=HDRSC7'
|
207 |
-
|
208 |
# Fetch the HTML content of the webpage
|
209 |
html_content = requests.get(url).text # Fix: Added .text to access the response text
|
210 |
soup = BeautifulSoup(html_content, "html.parser")
|
@@ -213,7 +214,7 @@ def get_the_ticker(stock_name):
|
|
213 |
step1 = [urllib.parse.unquote(i) for i in matches]
|
214 |
matches = [urllib.parse.unquote(i) for i in step1]
|
215 |
matches = list(set(matches[:2]))
|
216 |
-
|
217 |
|
218 |
return matches
|
219 |
|
@@ -221,16 +222,16 @@ def get_the_ticker(stock_name):
|
|
221 |
def get_the_ticker_stat(stock):
|
222 |
combination=[]
|
223 |
url = f'https://www.google.com/search?q={urllib.parse.quote(stock)}+site:businesstoday.in/stocks/&num=1&sca_esv=28795b6719ac1a08&sxsrf=ACQVn08xDA1EP1V6hJ-q4jLjjXSWWxgHTw:1711450545062&source=lnt&tbs=li:1&sa=X&ved=2ahUKEwj426eO4pGFAxX4n2MGHRXqBTUQpwV6BAgBEBM&biw=1280&bih=567&dpr=1.5'
|
224 |
-
|
225 |
# Fetch the HTML content of the webpage
|
226 |
html_content = requests.get(url).text # Fix: Added .text to access the response text
|
227 |
pattern = r'href="/url[?]q=(https://www.businesstoday.in/stocks/[^"]+)"'
|
228 |
# Find all matches using re.findall
|
229 |
links = re.findall(pattern, html_content)
|
230 |
links = list(set(links))
|
231 |
-
|
232 |
url = (links[0].split("&"))[0]
|
233 |
-
|
234 |
# Fetch the HTML content of the webpage
|
235 |
html_content = requests.get(url).text
|
236 |
soup = BeautifulSoup(html_content, "html.parser")
|
@@ -238,7 +239,7 @@ def get_the_ticker_stat(stock):
|
|
238 |
|
239 |
# Parse the JSON-LD script
|
240 |
json_data = json.loads(script.text)
|
241 |
-
#
|
242 |
# Iterate over the "mainEntity" array
|
243 |
qa_dict={}
|
244 |
for entity in json_data["mainEntity"]:
|
@@ -246,50 +247,50 @@ def get_the_ticker_stat(stock):
|
|
246 |
question = entity["name"].replace("'", "")
|
247 |
answer = entity["acceptedAnswer"]["text"].replace("'", "")
|
248 |
|
249 |
-
#
|
250 |
qa_dict[question]=answer
|
251 |
|
252 |
combination.append(qa_dict)
|
253 |
-
|
254 |
return(combination)
|
255 |
|
256 |
|
257 |
def get_the_ticker_news(stock):
|
258 |
all_news=[]
|
259 |
url = f'https://www.google.com/search?q={urllib.parse.quote(stock)}+site:trendlyne.com/research-reports&num=3&sca_esv=28795b6719ac1a08&sxsrf=ACQVn08xDA1EP1V6hJ-q4jLjjXSWWxgHTw:1711450545062&source=lnt&tbs=li:1&sa=X&ved=2ahUKEwj426eO4pGFAxX4n2MGHRXqBTUQpwV6BAgBEBM&biw=1280&bih=567&dpr=1.5'
|
260 |
-
#
|
261 |
# Fetch the HTML content of the webpage
|
262 |
html_content = requests.get(url).text # Fix: Added .text to access the response text
|
263 |
pattern = r'href="/url[?]q=(https://trendlyne.com/research-reports/[^"]+)"'
|
264 |
# Find all matches using re.findall
|
265 |
links = re.findall(pattern, html_content)
|
266 |
links = list(set(links))
|
267 |
-
|
268 |
if "/%" in links[0]:
|
269 |
fetched_reports_url = links[0].split("%")[0]
|
270 |
else:
|
271 |
fetched_reports_url = links[0].split("&")[0]
|
272 |
# fetched url may look like this - https://trendlyne.com/research-reports/post/ROLTA/1146/rolta-india-ltd/
|
273 |
-
|
274 |
pattern = '\/\/.*?(\d+).*\/'
|
275 |
match = re.search(pattern, fetched_reports_url)
|
276 |
if match:
|
277 |
-
|
278 |
split_url = fetched_reports_url.split("/")
|
279 |
-
|
280 |
unique_no = match.group(1)
|
281 |
-
|
282 |
company_name = split_url[-2]
|
283 |
-
|
284 |
reports_url = f"https://trendlyne.com/research-reports/stock/{unique_no}/{urllib.parse.quote(stock)}/{company_name}/"
|
285 |
else:
|
286 |
-
|
287 |
reports_url = fetched_reports_url
|
288 |
financials_url = reports_url.replace("research-reports/stock","equity")
|
289 |
url = reports_url.replace("research-reports/stock","latest-news")
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
|
294 |
# Fetch the HTML content of the webpage
|
295 |
# req = Request(
|
@@ -297,29 +298,29 @@ def get_the_ticker_news(stock):
|
|
297 |
# headers={'User-Agent': 'Mozilla/5.0'}
|
298 |
# )
|
299 |
# webpage = urlopen(req).read()
|
300 |
-
#
|
301 |
html_content = requests.get(url,headers={'User-Agent': 'Mozilla/5.0'}).text
|
302 |
html_content_financials = requests.get(financials_url,headers={'User-Agent': 'Mozilla/5.0'}).text
|
303 |
|
304 |
soup = BeautifulSoup(html_content, "html.parser")
|
305 |
-
#
|
306 |
href = None
|
307 |
a_tags = soup.find_all('a', class_='newslink')
|
308 |
if a_tags is not None:
|
309 |
links = [a_tag["href"] for a_tag in a_tags]
|
310 |
-
|
311 |
|
312 |
fin_soup = BeautifulSoup(html_content_financials, "html.parser")
|
313 |
matches = re.findall(r'data-companyinsights="\[(.*?)\]"', str(fin_soup))
|
314 |
company_insight = matches[0].replace(""","").replace("\\u20b", "").replace("parameter","Metric").replace("insight_color", "Trend").replace("insight_text", "Insight")
|
315 |
all_news.append(company_insight)
|
316 |
-
|
317 |
return all_news, reports_url
|
318 |
|
319 |
|
320 |
def get_google_news(ticker):
|
321 |
url = f"https://www.bing.com/news/search?q=Latest+News+on+{ticker}&form=YFNR&filters=sortbydate%3a%221%22"
|
322 |
-
|
323 |
sequence=[]
|
324 |
heading=[]
|
325 |
content = requests.get(url)
|
@@ -333,7 +334,7 @@ def get_google_news(ticker):
|
|
333 |
|
334 |
df = pd.DataFrame(heading, index=sequence, columns=["News-Headlines"])
|
335 |
df.sort_index()
|
336 |
-
|
337 |
return df["News-Headlines"].head(5).to_numpy()
|
338 |
|
339 |
|
@@ -373,26 +374,26 @@ def scrape_webpage(url):
|
|
373 |
def raw_news(raw_query, todays_news_func_call, ticker):
|
374 |
ticker_stats_str = ''
|
375 |
|
376 |
-
|
377 |
ticker_stats = get_the_ticker_stat(ticker)
|
378 |
|
379 |
-
|
380 |
ticker_financials, reports = get_the_ticker_news(ticker)
|
381 |
|
382 |
-
|
383 |
for ticker_stat in ticker_stats:
|
384 |
ticker_stats_str = ticker_stats_str + json.dumps(ticker_stat).replace("&", "'").replace("'", "'").replace(""", "'").replace("Link", "").replace("Heading", "").replace("Body", "").replace("Text", "").replace("{", "").replace("}", "")
|
385 |
|
386 |
-
|
387 |
|
388 |
-
|
389 |
news_google = get_google_news(ticker)
|
390 |
|
391 |
-
|
392 |
swot_news_link = f'https://widgets.trendlyne.com/web-widget/swot-widget/Poppins/{urllib.parse.quote(ticker)}/'
|
393 |
-
|
394 |
news_data = scrape_webpage(swot_news_link)
|
395 |
-
|
396 |
|
397 |
return news_data, ticker_stats_str, ticker_financials, reports, news_google
|
398 |
|
@@ -427,13 +428,13 @@ def generate_function_call(prompt):
|
|
427 |
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=True)
|
428 |
for response in stream:
|
429 |
output += response.token.text
|
430 |
-
|
431 |
-
|
432 |
pattern = r'\{[^{}]*\}'
|
433 |
# Find all matches
|
434 |
match = re.search(pattern, output)
|
435 |
extracted_string = match.group(0)
|
436 |
-
|
437 |
output = json.loads(extracted_string)
|
438 |
return output
|
439 |
|
@@ -444,35 +445,35 @@ def generate_final_response(prompt, history, context_files=[], ticker_stats=[],
|
|
444 |
# temperature = 1e-2
|
445 |
# top_p = float(top_p)
|
446 |
global display_ticker
|
447 |
-
|
448 |
generate_kwargs = dict(temperature=0.001,max_new_tokens=1024,top_p=0.95,repetition_penalty=1.0,do_sample=True,seed=42)
|
449 |
today = datetime.date.today()
|
450 |
todays_date = today.strftime('%d%B%Y')
|
451 |
question = format_prompt(prompt, history)
|
452 |
-
|
453 |
|
454 |
chat_completion_params = generate_function_call(question)
|
455 |
-
|
456 |
ticker = []
|
457 |
stock_names = chat_completion_params["stock_name"]
|
458 |
-
|
459 |
ticker = get_the_ticker(stock_names)
|
460 |
-
|
461 |
|
462 |
try:
|
463 |
if (chat_completion_params['todays_news_flag'] or chat_completion_params['generic_query']) and len(ticker)<1:
|
464 |
-
|
465 |
news_link.append(todays_news())
|
466 |
time.sleep(2)
|
467 |
elif chat_completion_params['todays_news_flag'] and len(ticker)>0:
|
468 |
for tick in chat_completion_params["stock_name"]:
|
469 |
news_googles.append(f"Latest News for {tick}\n\n {get_google_news(tick)}")
|
470 |
elif (chat_completion_params['follow_up_query'] and ticker_stats != []) or (display_ticker == ticker and ticker_stats != []):
|
471 |
-
|
472 |
chat_completion_params['follow_up_query'] = True
|
473 |
else:
|
474 |
-
|
475 |
-
|
476 |
for stock in ticker:
|
477 |
context_file, ticker_stat, ticker_financial, report, news_google = raw_news(raw_query=question, todays_news_func_call=chat_completion_params["todays_news_flag"], ticker=stock)
|
478 |
# Append each detail to its corresponding list
|
@@ -483,13 +484,13 @@ def generate_final_response(prompt, history, context_files=[], ticker_stats=[],
|
|
483 |
news_googles.append(news_google)
|
484 |
|
485 |
|
486 |
-
|
487 |
display_ticker = ticker if ticker else [0]
|
488 |
env = Environment(loader=FileSystemLoader("templates/"), autoescape=True)
|
489 |
# env.globals['include'] = lambda filename: env.loader.get_source(env, filename)[0]
|
490 |
template = env.get_template("system_prompt.txt")
|
491 |
content = template.render(todays_date=todays_date,ticker_financials=ticker_financials ,response_type="Response-1",chat_completion_params=chat_completion_params,context_file=context_files, question=question,ticker=ticker, ticker_stats = ticker_stats, reports=reports, news_link=news_link, earnings = earning_link, news_googles=news_googles)
|
492 |
-
#
|
493 |
output=""
|
494 |
try:
|
495 |
# Now start the streaming
|
|
|
1 |
import spaces
|
2 |
import datetime
|
3 |
+
import logging
|
4 |
import urllib.parse
|
5 |
import json
|
6 |
import asyncio
|
|
|
24 |
earning_stocks = ""
|
25 |
part = "day"
|
26 |
|
27 |
+
logging.info("currect hour:", (datetime.datetime.now().hour))
|
28 |
|
29 |
client = InferenceClient(
|
30 |
"mistralai/Mistral-7B-Instruct-v0.3"
|
|
|
68 |
|
69 |
def todays_news():
|
70 |
url = 'https://trendlyne.com/markets-today/'
|
71 |
+
logging.info("getting news from", url)
|
72 |
# Fetch the HTML content of the webpage
|
73 |
html_content = requests.get(url).text
|
74 |
soup = BeautifulSoup(html_content, 'html.parser')
|
|
|
90 |
notification.append(insight.find(class_='insight-notification').text.strip())
|
91 |
|
92 |
df = pd.DataFrame({"Timestamp": timestamps, "Stock": stock_names, "Label": insight_label, "Notification": notification})
|
93 |
+
logging.info("Dataframe created for stocks in news today")
|
94 |
+
logging.info(df)
|
95 |
df_dict = df.to_dict('records')
|
96 |
return df_dict
|
97 |
|
|
|
116 |
linkx.append(a_tag['href'])
|
117 |
filtered_links = list(set([link for link in linkx if link.endswith('.html')]))
|
118 |
else:
|
119 |
+
# If the request was not successful, logging.info an error message
|
120 |
+
logging.info("Failed to retrieve article links from the moneycontrol")
|
121 |
return
|
122 |
|
123 |
common_content = ""
|
|
|
138 |
split_author = split_article.split('author', 1)[0]
|
139 |
heading = "Heading -" + link.split('/')[-1].replace("-", " ")
|
140 |
body = "Body -" + re.sub('[:\n-\";]|amp', ' ', split_author)
|
141 |
+
logging.info(heading)
|
142 |
common_content = common_content + str({heading: body}) + "," + "\n"
|
143 |
|
144 |
today = datetime.date.today()
|
|
|
205 |
raw_query = f"YAHOO FINANCE TICKER SYMBOL OF {stock.upper()}"
|
206 |
query = raw_query.replace(" ", "+")
|
207 |
url = f'https://www.google.com/search?q={query}&FORM=HDRSC7'
|
208 |
+
logging.info("searching ticker using url: ",url)
|
209 |
# Fetch the HTML content of the webpage
|
210 |
html_content = requests.get(url).text # Fix: Added .text to access the response text
|
211 |
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
214 |
step1 = [urllib.parse.unquote(i) for i in matches]
|
215 |
matches = [urllib.parse.unquote(i) for i in step1]
|
216 |
matches = list(set(matches[:2]))
|
217 |
+
logging.info("List of matches obtained: ", matches)
|
218 |
|
219 |
return matches
|
220 |
|
|
|
222 |
def get_the_ticker_stat(stock):
|
223 |
combination=[]
|
224 |
url = f'https://www.google.com/search?q={urllib.parse.quote(stock)}+site:businesstoday.in/stocks/&num=1&sca_esv=28795b6719ac1a08&sxsrf=ACQVn08xDA1EP1V6hJ-q4jLjjXSWWxgHTw:1711450545062&source=lnt&tbs=li:1&sa=X&ved=2ahUKEwj426eO4pGFAxX4n2MGHRXqBTUQpwV6BAgBEBM&biw=1280&bih=567&dpr=1.5'
|
225 |
+
logging.info("getting ticker stat url from: ", url)
|
226 |
# Fetch the HTML content of the webpage
|
227 |
html_content = requests.get(url).text # Fix: Added .text to access the response text
|
228 |
pattern = r'href="/url[?]q=(https://www.businesstoday.in/stocks/[^"]+)"'
|
229 |
# Find all matches using re.findall
|
230 |
links = re.findall(pattern, html_content)
|
231 |
links = list(set(links))
|
232 |
+
logging.info("List of links obtained for ticker stat: ", links)
|
233 |
url = (links[0].split("&"))[0]
|
234 |
+
logging.info("Final URL to fetch stats", url)
|
235 |
# Fetch the HTML content of the webpage
|
236 |
html_content = requests.get(url).text
|
237 |
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
239 |
|
240 |
# Parse the JSON-LD script
|
241 |
json_data = json.loads(script.text)
|
242 |
+
# logging.info(json_data)
|
243 |
# Iterate over the "mainEntity" array
|
244 |
qa_dict={}
|
245 |
for entity in json_data["mainEntity"]:
|
|
|
247 |
question = entity["name"].replace("'", "")
|
248 |
answer = entity["acceptedAnswer"]["text"].replace("'", "")
|
249 |
|
250 |
+
# logging.info the question and answer
|
251 |
qa_dict[question]=answer
|
252 |
|
253 |
combination.append(qa_dict)
|
254 |
+
logging.info("All the stat data fetched: ", combination)
|
255 |
return(combination)
|
256 |
|
257 |
|
258 |
def get_the_ticker_news(stock):
|
259 |
all_news=[]
|
260 |
url = f'https://www.google.com/search?q={urllib.parse.quote(stock)}+site:trendlyne.com/research-reports&num=3&sca_esv=28795b6719ac1a08&sxsrf=ACQVn08xDA1EP1V6hJ-q4jLjjXSWWxgHTw:1711450545062&source=lnt&tbs=li:1&sa=X&ved=2ahUKEwj426eO4pGFAxX4n2MGHRXqBTUQpwV6BAgBEBM&biw=1280&bih=567&dpr=1.5'
|
261 |
+
# logging.info(url)
|
262 |
# Fetch the HTML content of the webpage
|
263 |
html_content = requests.get(url).text # Fix: Added .text to access the response text
|
264 |
pattern = r'href="/url[?]q=(https://trendlyne.com/research-reports/[^"]+)"'
|
265 |
# Find all matches using re.findall
|
266 |
links = re.findall(pattern, html_content)
|
267 |
links = list(set(links))
|
268 |
+
logging.info("Links fetched to get trendlyne research report: ",links)
|
269 |
if "/%" in links[0]:
|
270 |
fetched_reports_url = links[0].split("%")[0]
|
271 |
else:
|
272 |
fetched_reports_url = links[0].split("&")[0]
|
273 |
# fetched url may look like this - https://trendlyne.com/research-reports/post/ROLTA/1146/rolta-india-ltd/
|
274 |
+
logging.info("finalised url: ", fetched_reports_url)
|
275 |
pattern = '\/\/.*?(\d+).*\/'
|
276 |
match = re.search(pattern, fetched_reports_url)
|
277 |
if match:
|
278 |
+
logging.info("unique number identified in url")
|
279 |
split_url = fetched_reports_url.split("/")
|
280 |
+
logging.info(split_url)
|
281 |
unique_no = match.group(1)
|
282 |
+
logging.info("Unique no identified: ",unique_no)
|
283 |
company_name = split_url[-2]
|
284 |
+
logging.info("Company name identified: ",company_name)
|
285 |
reports_url = f"https://trendlyne.com/research-reports/stock/{unique_no}/{urllib.parse.quote(stock)}/{company_name}/"
|
286 |
else:
|
287 |
+
logging.info("unique number not identified in url continuing basic flow")
|
288 |
reports_url = fetched_reports_url
|
289 |
financials_url = reports_url.replace("research-reports/stock","equity")
|
290 |
url = reports_url.replace("research-reports/stock","latest-news")
|
291 |
+
logging.info("\nURL to fetch news links: ", url)
|
292 |
+
logging.info(f"\nURL to fetch health of financial insights:\n {financials_url}")
|
293 |
+
logging.info(f"\nURL to fetch rating info:\n {reports_url}")
|
294 |
|
295 |
# Fetch the HTML content of the webpage
|
296 |
# req = Request(
|
|
|
298 |
# headers={'User-Agent': 'Mozilla/5.0'}
|
299 |
# )
|
300 |
# webpage = urlopen(req).read()
|
301 |
+
# logging.info(webpage)
|
302 |
html_content = requests.get(url,headers={'User-Agent': 'Mozilla/5.0'}).text
|
303 |
html_content_financials = requests.get(financials_url,headers={'User-Agent': 'Mozilla/5.0'}).text
|
304 |
|
305 |
soup = BeautifulSoup(html_content, "html.parser")
|
306 |
+
# logging.info(soup)
|
307 |
href = None
|
308 |
a_tags = soup.find_all('a', class_='newslink')
|
309 |
if a_tags is not None:
|
310 |
links = [a_tag["href"] for a_tag in a_tags]
|
311 |
+
logging.info(f"\nNews Links:\n{links}")
|
312 |
|
313 |
fin_soup = BeautifulSoup(html_content_financials, "html.parser")
|
314 |
matches = re.findall(r'data-companyinsights="\[(.*?)\]"', str(fin_soup))
|
315 |
company_insight = matches[0].replace(""","").replace("\\u20b", "").replace("parameter","Metric").replace("insight_color", "Trend").replace("insight_text", "Insight")
|
316 |
all_news.append(company_insight)
|
317 |
+
logging.info("All news insights obtained: ",all_news)
|
318 |
return all_news, reports_url
|
319 |
|
320 |
|
321 |
def get_google_news(ticker):
|
322 |
url = f"https://www.bing.com/news/search?q=Latest+News+on+{ticker}&form=YFNR&filters=sortbydate%3a%221%22"
|
323 |
+
logging.info(url)
|
324 |
sequence=[]
|
325 |
heading=[]
|
326 |
content = requests.get(url)
|
|
|
334 |
|
335 |
df = pd.DataFrame(heading, index=sequence, columns=["News-Headlines"])
|
336 |
df.sort_index()
|
337 |
+
logging.info(df.head(5))
|
338 |
return df["News-Headlines"].head(5).to_numpy()
|
339 |
|
340 |
|
|
|
374 |
def raw_news(raw_query, todays_news_func_call, ticker):
|
375 |
ticker_stats_str = ''
|
376 |
|
377 |
+
logging.info("Getting into get_the_ticker_stat()")
|
378 |
ticker_stats = get_the_ticker_stat(ticker)
|
379 |
|
380 |
+
logging.info("Getting into get_the_ticker_news()")
|
381 |
ticker_financials, reports = get_the_ticker_news(ticker)
|
382 |
|
383 |
+
logging.info("Brokers report link", reports)
|
384 |
for ticker_stat in ticker_stats:
|
385 |
ticker_stats_str = ticker_stats_str + json.dumps(ticker_stat).replace("&", "'").replace("'", "'").replace(""", "'").replace("Link", "").replace("Heading", "").replace("Body", "").replace("Text", "").replace("{", "").replace("}", "")
|
386 |
|
387 |
+
logging.info("All ticker stat: ", ticker_stats_str)
|
388 |
|
389 |
+
logging.info("Getting into get_google_news()")
|
390 |
news_google = get_google_news(ticker)
|
391 |
|
392 |
+
logging.info(f"Getting into swot analysis for {ticker}")
|
393 |
swot_news_link = f'https://widgets.trendlyne.com/web-widget/swot-widget/Poppins/{urllib.parse.quote(ticker)}/'
|
394 |
+
logging.info("SWOT Link: ", swot_news_link)
|
395 |
news_data = scrape_webpage(swot_news_link)
|
396 |
+
logging.info("SWOT data successfully scraped")
|
397 |
|
398 |
return news_data, ticker_stats_str, ticker_financials, reports, news_google
|
399 |
|
|
|
428 |
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=True)
|
429 |
for response in stream:
|
430 |
output += response.token.text
|
431 |
+
logging.info("Raw output from LLM: ", output)
|
432 |
+
logging.info("Datatype: ", type(output))
|
433 |
pattern = r'\{[^{}]*\}'
|
434 |
# Find all matches
|
435 |
match = re.search(pattern, output)
|
436 |
extracted_string = match.group(0)
|
437 |
+
logging.info("Extracted string from response: ", extracted_string)
|
438 |
output = json.loads(extracted_string)
|
439 |
return output
|
440 |
|
|
|
445 |
# temperature = 1e-2
|
446 |
# top_p = float(top_p)
|
447 |
global display_ticker
|
448 |
+
logging.info("Current Value of GLOBAL TICKER: ",display_ticker)
|
449 |
generate_kwargs = dict(temperature=0.001,max_new_tokens=1024,top_p=0.95,repetition_penalty=1.0,do_sample=True,seed=42)
|
450 |
today = datetime.date.today()
|
451 |
todays_date = today.strftime('%d%B%Y')
|
452 |
question = format_prompt(prompt, history)
|
453 |
+
logging.info("\n\nQuestion: ",question)
|
454 |
|
455 |
chat_completion_params = generate_function_call(question)
|
456 |
+
logging.info(chat_completion_params)
|
457 |
ticker = []
|
458 |
stock_names = chat_completion_params["stock_name"]
|
459 |
+
logging.info("Getting into get_the_ticker()")
|
460 |
ticker = get_the_ticker(stock_names)
|
461 |
+
logging.info("Final Ticker: ", ticker)
|
462 |
|
463 |
try:
|
464 |
if (chat_completion_params['todays_news_flag'] or chat_completion_params['generic_query']) and len(ticker)<1:
|
465 |
+
logging.info("Getting Latest News Headlines")
|
466 |
news_link.append(todays_news())
|
467 |
time.sleep(2)
|
468 |
elif chat_completion_params['todays_news_flag'] and len(ticker)>0:
|
469 |
for tick in chat_completion_params["stock_name"]:
|
470 |
news_googles.append(f"Latest News for {tick}\n\n {get_google_news(tick)}")
|
471 |
elif (chat_completion_params['follow_up_query'] and ticker_stats != []) or (display_ticker == ticker and ticker_stats != []):
|
472 |
+
logging.info("\n\nAssigned into a followup query\n\n")
|
473 |
chat_completion_params['follow_up_query'] = True
|
474 |
else:
|
475 |
+
logging.info("prompt & ticker: ", question, ticker )
|
476 |
+
logging.info("Getting into raw_news()")
|
477 |
for stock in ticker:
|
478 |
context_file, ticker_stat, ticker_financial, report, news_google = raw_news(raw_query=question, todays_news_func_call=chat_completion_params["todays_news_flag"], ticker=stock)
|
479 |
# Append each detail to its corresponding list
|
|
|
484 |
news_googles.append(news_google)
|
485 |
|
486 |
|
487 |
+
logging.info(f"Generating response for **{question}**")
|
488 |
display_ticker = ticker if ticker else [0]
|
489 |
env = Environment(loader=FileSystemLoader("templates/"), autoescape=True)
|
490 |
# env.globals['include'] = lambda filename: env.loader.get_source(env, filename)[0]
|
491 |
template = env.get_template("system_prompt.txt")
|
492 |
content = template.render(todays_date=todays_date,ticker_financials=ticker_financials ,response_type="Response-1",chat_completion_params=chat_completion_params,context_file=context_files, question=question,ticker=ticker, ticker_stats = ticker_stats, reports=reports, news_link=news_link, earnings = earning_link, news_googles=news_googles)
|
493 |
+
# logging.info(content)
|
494 |
output=""
|
495 |
try:
|
496 |
# Now start the streaming
|