python-scripts/08_basic_email_web_crawler.py
Michael Herman 160b68c08c Merge pull request #2 from devoxel/master
Add +x on all files for owner
2016-03-27 13:58:23 -06:00

22 lines
439 B
Python
Executable File

import requests
import re
# get url
url = input('Enter a URL (include `http://`): ')
# connect to the url
website = requests.get(url)
# read html
html = website.text
# use re.findall to grab all the links
links = re.findall('"((http|ftp)s?://.*?)"', html)
emails = re.findall('([\w\.,]+@[\w\.,]+\.\w+)', html)
# print the number of links in the list
print("\nFound {} links".format(len(links)))
for email in emails:
print(email)