Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| def fetch_web_content(url): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| # Parse the HTML content | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Extract readable text from the web page (ignoring scripts, styles, etc.) | |
| page_text = soup.get_text(separator=' ', strip=True) | |
| return page_text[:5000] # Limit to 5000 chars (API limitation) | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error fetching the webpage: {e}") | |
| return None | |
| if __name__ == "__main__": | |
| url = "https://en.wikipedia.org/wiki/Natural_language_processing" | |
| content = fetch_web_content(url) | |
| print(content[:500]) # Print a sample of the content | |