sharsh02 commited on
Commit
231b924
1 Parent(s): b55d55e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -10
app.py CHANGED
@@ -86,9 +86,9 @@ def todays_news():
86
 
87
  def get_moneycontrol_news():
88
  # Function to extract paragraphs and list items from a webpage
 
89
  response = requests.get("https://www.moneycontrol.com/news/business/stocks/")
90
- linkx = []
91
-
92
  # Check if the request was successful
93
  if response.status_code == 200:
94
  # Parse the HTML content of the page
@@ -105,10 +105,9 @@ def get_moneycontrol_news():
105
  linkx.append(a_tag['href'])
106
  filtered_links = list(set([link for link in linkx if link.endswith('.html')]))
107
  else:
108
- # If the request was not successful, logging.info an error message
109
- logging.info("Failed to retrieve article links from the moneycontrol")
110
- return
111
-
112
  common_content = ""
113
 
114
  # Iterate over each link
@@ -125,14 +124,22 @@ def get_moneycontrol_news():
125
  if 'articleBody' in script.text:
126
  split_article = script.text.split('articleBody', 1)[1]
127
  split_author = split_article.split('author', 1)[0]
128
- heading = "Heading -" + link.split('/')[-1].replace("-", " ")
 
129
  body = "Body -" + re.sub('[:\n-\";]|amp', ' ', split_author)
130
- logging.info(heading)
131
- common_content = common_content + str({heading: body}) + "," + "\n"
 
132
 
 
133
  today = datetime.date.today()
134
  formatted_date = today.strftime('%d%B%Y')
135
- return common_content
 
 
 
 
 
136
 
137
 
138
  def job():
 
86
 
87
  def get_moneycontrol_news():
88
  # Function to extract paragraphs and list items from a webpage
89
+ # Send a GET request to the URL
90
  response = requests.get("https://www.moneycontrol.com/news/business/stocks/")
91
+ linkx =[]
 
92
  # Check if the request was successful
93
  if response.status_code == 200:
94
  # Parse the HTML content of the page
 
105
  linkx.append(a_tag['href'])
106
  filtered_links = list(set([link for link in linkx if link.endswith('.html')]))
107
  else:
108
+ # If the request was not successful, print an error message
109
+ print("Failed to retrieve article links from the moneycontrol")
110
+
 
111
  common_content = ""
112
 
113
  # Iterate over each link
 
124
  if 'articleBody' in script.text:
125
  split_article = script.text.split('articleBody', 1)[1]
126
  split_author = split_article.split('author', 1)[0]
127
+ # print(split_author)
128
+ heading = "Heading -" + link.split('/')[-1].replace("-"," ")
129
  body = "Body -" + re.sub('[:\n-\";]|amp', ' ', split_author)
130
+ print(heading)
131
+ common_content = common_content + str({heading: body}) +","+"\n"
132
+ print(f"Article Scraped Successfully from {link}")
133
 
134
+ print("Creating context file...")
135
  today = datetime.date.today()
136
  formatted_date = today.strftime('%d%B%Y')
137
+ filename = f"templates/{formatted_date}" + '.txt'
138
+ with open(filename, 'w') as file:
139
+ file.write(f"The news given below was available on moneycontrol on {formatted_date}:\n")
140
+ file.write(common_content)
141
+ print(f"{filename} file generated")
142
+ return(common_content,filename)
143
 
144
 
145
  def job():