from bs4 import BeautifulSoup
import requests
from urllib.parse import urlparse
async def get_link_preview(url):
try:
# Ensure the URL has a scheme
parsed_url = urlparse(url)
if not parsed_url.scheme:
# Default to 'https' if the scheme is missing
url = f'https://{url}'
parsed_url = urlparse(url)
# Validate the URL scheme
if parsed_url.scheme not in ['http', 'https']:
raise ValueError("Only 'http' and 'https' protocols are allowed.")
# Perform the HTTP request
response = requests.get(url)
if response.status_code != 200:
return {}
soup = BeautifulSoup(response.text, 'html.parser')
metadata = {
"title": soup.find('meta', property='og:title') or
soup.find('meta', attrs={'name': 'twitter:title'})
or soup.title.string,
"description": soup.find('meta', property='og:description') or
soup.find('meta', attrs={'name': 'twitter:description'}) or
soup.find('meta', attrs={'name': 'description'}),
"img": soup.find('meta', property='og:image') or
soup.find('meta', attrs={'name': 'twitter:image'}) or
soup.find('link', rel='image_src') or
soup.find('img')['src'] if soup.find('img')
else None,
"url": soup.find('meta', property='og:url') or url,
"sitename": parsed_url.hostname #soup.find('meta', property='og:site_name')
}
# Clean up metadata
for key, value in metadata.items():
if value and hasattr(value, 'get'):
metadata[key] = value.get('content')
return metadata
except Exception as e:
raise Exception(e)
#return {}