diff --git a/08_basic_email_web_crawler.py b/08_basic_email_web_crawler.py index a7dbbce..b56c747 100644 --- a/08_basic_email_web_crawler.py +++ b/08_basic_email_web_crawler.py @@ -1,26 +1,21 @@ import requests import re -#get url -#url=input('Enter a URL (include 'http://'):')--this is wrong +# get url url = input('Enter a URL (include `http://`): ') +# connect to the url +website = requests.get(url) -#connect to the url -website=requests.get(url) +# read html +html = website.text -#read html -html=website.text - - -#use re.findall to grab all the links +# use re.findall to grab all the links links = re.findall('"((http|ftp)s?://.*?)"', html) - -emails=re.findall('([\w\.,]+@[\w\.,]+\.\w+)',html) +emails = re.findall('([\w\.,]+@[\w\.,]+\.\w+)', html) -#prints the number of links in the list +# print the number of links in the list print("\nFound {} links".format(len(links))) - for email in emails: - print(email) + print(email)