Spaces:

sharsh02
/

StockX-Mixtral8x7B

Running

App Files Files

sharsh02 commited on Aug 2

Commit

231b924

•

1 Parent(s): b55d55e

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -10

app.py CHANGED Viewed

@@ -86,9 +86,9 @@ def todays_news():
 def get_moneycontrol_news():
     # Function to extract paragraphs and list items from a webpage
     response = requests.get("https://www.moneycontrol.com/news/business/stocks/")
-    linkx = []
     # Check if the request was successful
     if response.status_code == 200:
         # Parse the HTML content of the page
@@ -105,10 +105,9 @@ def get_moneycontrol_news():
                     linkx.append(a_tag['href'])
         filtered_links = list(set([link for link in linkx if link.endswith('.html')]))
     else:
-        # If the request was not successful, logging.info an error message
-        logging.info("Failed to retrieve article links from the moneycontrol")
-        return
     common_content = ""
     # Iterate over each link
@@ -125,14 +124,22 @@ def get_moneycontrol_news():
                 if 'articleBody' in script.text:
                     split_article = script.text.split('articleBody', 1)[1]
                     split_author = split_article.split('author', 1)[0]
-                    heading = "Heading -" + link.split('/')[-1].replace("-", " ")
                     body = "Body -" + re.sub('[:\n-\";]|amp', ' ', split_author)
-                    logging.info(heading)
-                    common_content = common_content + str({heading: body}) + "," + "\n"
     today = datetime.date.today()
     formatted_date = today.strftime('%d%B%Y')
-    return common_content
 def job():

 def get_moneycontrol_news():
     # Function to extract paragraphs and list items from a webpage
+    # Send a GET request to the URL
     response = requests.get("https://www.moneycontrol.com/news/business/stocks/")
+    linkx =[]
     # Check if the request was successful
     if response.status_code == 200:
         # Parse the HTML content of the page
                     linkx.append(a_tag['href'])
         filtered_links = list(set([link for link in linkx if link.endswith('.html')]))
     else:
+        # If the request was not successful, print an error message
+        print("Failed to retrieve article links from the moneycontrol")
     common_content = ""
     # Iterate over each link
                 if 'articleBody' in script.text:
                     split_article = script.text.split('articleBody', 1)[1]
                     split_author = split_article.split('author', 1)[0]
+                    # print(split_author)
+                    heading = "Heading -" + link.split('/')[-1].replace("-"," ")
                     body = "Body -" + re.sub('[:\n-\";]|amp', ' ', split_author)
+                    print(heading)
+                    common_content = common_content + str({heading: body}) +","+"\n"
+                    print(f"Article Scraped Successfully from {link}")
+    print("Creating context file...")
     today = datetime.date.today()
     formatted_date = today.strftime('%d%B%Y')
+    filename = f"templates/{formatted_date}" + '.txt'
+    with open(filename, 'w') as file:
+        file.write(f"The news given below was available on moneycontrol on {formatted_date}:\n")
+        file.write(common_content)
+        print(f"{filename} file generated")
+    return(common_content,filename)
 def job():