2014-05-14 02:48:46 +02:00
|
|
|
import requests
|
|
|
|
import re
|
|
|
|
|
2015-12-02 17:21:55 +01:00
|
|
|
#get url
|
|
|
|
#url=input('Enter a URL (include 'http://'):')--this is wrong
|
|
|
|
url = input('Enter a URL (include `http://`): ')
|
2014-05-14 02:48:46 +02:00
|
|
|
|
2014-05-18 16:29:23 +02:00
|
|
|
|
2015-12-02 17:21:55 +01:00
|
|
|
#connect to the url
|
|
|
|
website=requests.get(url)
|
2014-05-14 02:48:46 +02:00
|
|
|
|
2015-12-02 17:21:55 +01:00
|
|
|
#read html
|
|
|
|
html=website.text
|
2014-05-14 02:48:46 +02:00
|
|
|
|
|
|
|
|
2015-12-02 17:21:55 +01:00
|
|
|
#use re.findall to grab all the links
|
|
|
|
links = re.findall('"((http|ftp)s?://.*?)"', html)
|
2014-05-14 02:48:46 +02:00
|
|
|
|
2015-12-02 17:21:55 +01:00
|
|
|
emails=re.findall('([\w\.,]+@[\w\.,]+\.\w+)',html)
|
2014-05-14 02:48:46 +02:00
|
|
|
|
|
|
|
|
2015-12-02 17:21:55 +01:00
|
|
|
#prints the number of links in the list
|
|
|
|
print("\nFound {} links".format(len(links)))
|
2014-05-14 02:48:46 +02:00
|
|
|
|
2015-12-02 17:21:55 +01:00
|
|
|
for email in emails:
|
|
|
|
print(email)
|