new script
This commit is contained in:
parent
a6e299ff70
commit
f467a62416
18
02_find_all_links.py
Normal file
18
02_find_all_links.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
import urllib2
|
||||||
|
import re
|
||||||
|
|
||||||
|
# get url
|
||||||
|
url =raw_input('Enter a URL (include `http://`): ')
|
||||||
|
|
||||||
|
# connect to the url
|
||||||
|
website = urllib2.urlopen(url)
|
||||||
|
|
||||||
|
# read html
|
||||||
|
html = website.read()
|
||||||
|
|
||||||
|
# use re.findall to grab all the links
|
||||||
|
links = re.findall('"((http|ftp)s?://.*?)"', html)
|
||||||
|
|
||||||
|
# output links
|
||||||
|
for link in links:
|
||||||
|
print link[0]
|
@ -1,3 +1,4 @@
|
|||||||
## Just another repo of Python scripts
|
## Just another repo of Python scripts
|
||||||
|
|
||||||
1. **remove_all_pyc.md**: remove all *.pyc* files from a git repo
|
1. **01_remove_all_pyc.md**: remove all *.pyc* files from a git repo
|
||||||
|
2. **02_find_all_links.py**: get all links from a webpage
|
Loading…
Reference in New Issue
Block a user