Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -86,9 +86,9 @@ def todays_news():
|
|
86 |
|
87 |
def get_moneycontrol_news():
|
88 |
# Function to extract paragraphs and list items from a webpage
|
|
|
89 |
response = requests.get("https://www.moneycontrol.com/news/business/stocks/")
|
90 |
-
linkx =
|
91 |
-
|
92 |
# Check if the request was successful
|
93 |
if response.status_code == 200:
|
94 |
# Parse the HTML content of the page
|
@@ -105,10 +105,9 @@ def get_moneycontrol_news():
|
|
105 |
linkx.append(a_tag['href'])
|
106 |
filtered_links = list(set([link for link in linkx if link.endswith('.html')]))
|
107 |
else:
|
108 |
-
# If the request was not successful,
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
common_content = ""
|
113 |
|
114 |
# Iterate over each link
|
@@ -125,14 +124,22 @@ def get_moneycontrol_news():
|
|
125 |
if 'articleBody' in script.text:
|
126 |
split_article = script.text.split('articleBody', 1)[1]
|
127 |
split_author = split_article.split('author', 1)[0]
|
128 |
-
|
|
|
129 |
body = "Body -" + re.sub('[:\n-\";]|amp', ' ', split_author)
|
130 |
-
|
131 |
-
common_content = common_content + str({heading: body}) +
|
|
|
132 |
|
|
|
133 |
today = datetime.date.today()
|
134 |
formatted_date = today.strftime('%d%B%Y')
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
|
138 |
def job():
|
|
|
86 |
|
87 |
def get_moneycontrol_news():
|
88 |
# Function to extract paragraphs and list items from a webpage
|
89 |
+
# Send a GET request to the URL
|
90 |
response = requests.get("https://www.moneycontrol.com/news/business/stocks/")
|
91 |
+
linkx =[]
|
|
|
92 |
# Check if the request was successful
|
93 |
if response.status_code == 200:
|
94 |
# Parse the HTML content of the page
|
|
|
105 |
linkx.append(a_tag['href'])
|
106 |
filtered_links = list(set([link for link in linkx if link.endswith('.html')]))
|
107 |
else:
|
108 |
+
# If the request was not successful, print an error message
|
109 |
+
print("Failed to retrieve article links from the moneycontrol")
|
110 |
+
|
|
|
111 |
common_content = ""
|
112 |
|
113 |
# Iterate over each link
|
|
|
124 |
if 'articleBody' in script.text:
|
125 |
split_article = script.text.split('articleBody', 1)[1]
|
126 |
split_author = split_article.split('author', 1)[0]
|
127 |
+
# print(split_author)
|
128 |
+
heading = "Heading -" + link.split('/')[-1].replace("-"," ")
|
129 |
body = "Body -" + re.sub('[:\n-\";]|amp', ' ', split_author)
|
130 |
+
print(heading)
|
131 |
+
common_content = common_content + str({heading: body}) +","+"\n"
|
132 |
+
print(f"Article Scraped Successfully from {link}")
|
133 |
|
134 |
+
print("Creating context file...")
|
135 |
today = datetime.date.today()
|
136 |
formatted_date = today.strftime('%d%B%Y')
|
137 |
+
filename = f"templates/{formatted_date}" + '.txt'
|
138 |
+
with open(filename, 'w') as file:
|
139 |
+
file.write(f"The news given below was available on moneycontrol on {formatted_date}:\n")
|
140 |
+
file.write(common_content)
|
141 |
+
print(f"{filename} file generated")
|
142 |
+
return(common_content,filename)
|
143 |
|
144 |
|
145 |
def job():
|