blob: f9d200b6030427730ff6305db22dd8973356cdfc (
plain) (
tree)
|
|
#!/usr/bin/python3
import sys
import email
import quopri
import re
from bs4 import BeautifulSoup
import tempfile
import shutil
import subprocess
def extract_urls_html(html_string):
soup = BeautifulSoup(html_string, 'html.parser')
a_tags = soup.find_all('a', href=True)
urls = [a['href'] for a in a_tags]
return urls
def extract_urls_text(input_string):
pattern = r'https?://\S+'
urls = re.findall(pattern, input_string)
return urls
def decode_quoted_printable(encoded_text):
return quopri.decodestring(encoded_text).decode('utf-8')
def parse_mbox(mbox_text, contenttype):
mbox = email.message_from_string(mbox_text)
for part in mbox.walk():
content_type = part.get_content_type()
if content_type == contenttype:
content_transfer_encoding = part.get('Content-Transfer-Encoding', '').lower()
if content_transfer_encoding == 'quoted-printable':
content = decode_quoted_printable(part.get_payload())
else:
content = part.get_payload()
return content
if __name__ == "__main__":
mbox_text = sys.stdin.read()
temp_dir = tempfile.mkdtemp("linkclicker")
for link in extract_urls_text(parse_mbox(mbox_text, "text/plain")) + extract_urls_html(parse_mbox(mbox_text, "text/html")):
cmd = ["timeout", "30s", "librewolf", "--headless", "--profile", temp_dir, "--no-remote", "--new-instance", "--screenshot", temp_dir + "/screenshot.png", "--", link]
result = subprocess.run(cmd, capture_output=True, text=True)
print("cmd: " + str(cmd) + "\nstdout: " + result.stdout + "\nstderr: " + result.stderr + "\nreturncode: " + str(result.returncode));
shutil.rmtree(temp_dir)
|