diff --git a/remove_all_pyc.md b/01_remove_all_pyc.md similarity index 100% rename from remove_all_pyc.md rename to 01_remove_all_pyc.md diff --git a/02_find_all_links.py b/02_find_all_links.py new file mode 100644 index 0000000..76a7c99 --- /dev/null +++ b/02_find_all_links.py @@ -0,0 +1,18 @@ +import urllib2 +import re + +# get url +url =raw_input('Enter a URL (include `http://`): ') + +# connect to the url +website = urllib2.urlopen(url) + +# read html +html = website.read() + +# use re.findall to grab all the links +links = re.findall('"((http|ftp)s?://.*?)"', html) + +# output links +for link in links: + print link[0] \ No newline at end of file diff --git a/readme.md b/readme.md index 6320ffc..ab56fe1 100644 --- a/readme.md +++ b/readme.md @@ -1,3 +1,4 @@ ## Just another repo of Python scripts -1. **remove_all_pyc.md**: remove all *.pyc* files from a git repo \ No newline at end of file +1. **01_remove_all_pyc.md**: remove all *.pyc* files from a git repo +2. **02_find_all_links.py**: get all links from a webpage \ No newline at end of file