python-scripts/02_find_all_links.py

19 lines
314 B
Python
Raw Normal View History

import requests
2014-04-21 15:55:09 +02:00
import re
# get url
url = input('Enter a URL (include `http://`): ')
2014-04-21 15:55:09 +02:00
# connect to the url
website = requests.get(url)
2014-04-21 15:55:09 +02:00
# read html
html = website.text
2014-04-21 15:55:09 +02:00
# use re.findall to grab all the links
links = re.findall('"((http|ftp)s?://.*?)"', html)
# output links
for link in links:
print(link[0])