Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 42 additions & 19 deletions Google_News.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,60 @@

from bs4 import BeautifulSoup as soup

# --- Helper Functions for Error handling---


def fetch_xml(url):
"""Fetch XML content safely from a URL."""
try:
context = ssl._create_unverified_context()
with urlopen(url, context=context) as client:
return client.read()
except Exception as e:
print(f"Error fetching URL: {e}")
return None


def get_text_or_default(tag, default="N/A"):
"""Safely extract text from a tag."""
return tag.text if tag else default


# --- News printing function---


def news(xml_news_url, counter):
"""Print select details from a html response containing xml
@param xml_news_url: url to parse
"""

context = ssl._create_unverified_context()
Client = urlopen(xml_news_url, context=context)
xml_page = Client.read()
Client.close()
xml_page = fetch_xml(xml_news_url)
if xml_page is None:
return

soup_page = soup(xml_page, "xml")

news_list = soup_page.findAll("item")
i = 0 # counter to print n number of news items

for news in news_list:
print(f"news title: {news.title.text}") # to print title of the news
print(f"news link: {news.link.text}") # to print link of the news
print(f"news pubDate: {news.pubDate.text}") # to print published date
print("+-" * 20, "\n\n")

if i == counter:
for i, item in enumerate(news_list):
if i >= counter:
break
i = i + 1

title = get_text_or_default(item.title)
link = get_text_or_default(item.link)
pub_date = get_text_or_default(item.pubDate)

print(f"news title: {title}")
print(f"news link: {link}")
print(f"news pubDate: {pub_date}")
print("+-" * 20, "\n\n")


# you can add google news 'xml' URL here for any country/category
news_url = "https://news.google.com/news/rss/?ned=us&gl=US&hl=en"
sports_url = "https://news.google.com/news/rss/headlines/section/topic/SPORTS.en_in/Sports?ned=in&hl=en-IN&gl=IN"
if __name__ == "__main__":
# you can add google news 'xml' URL here for any country/category
news_url = "https://news.google.com/news/rss/?ned=us&gl=US&hl=en"
sports_url = "https://news.google.com/news/rss/headlines/section/topic/SPORTS.en_in/Sports?ned=in&hl=en-IN&gl=IN"

# now call news function with any of these url or BOTH
news(news_url, 10)
news(sports_url, 5)
# now call news function with any of these url or BOTH
news(news_url, 10)
news(sports_url, 5)