python-scripts/scripts/26_stock_scraper.py

33 lines
924 B
Python
Raw Normal View History

import requests
from lxml import html
from collections import defaultdict
def get_stocks(url):
# Make Request
page = requests.get(url)
# Parse/Scrape
tree = html.fromstring(page.text)
xpath = '//*[@id="mw-content-text"]/table[1]'
rows = tree.xpath(xpath)[0].findall("tr")
rows = [(row.getchildren()[0], row.getchildren()[3]) for row in rows[1:]]
rows = [(row[0].getchildren()[0].text, row[1].text) for row in rows]
industries = defaultdict(list)
for row in rows:
industries[row[1]].append(row[0])
return industries
def output_data(data_dict):
for industry in data_dict:
print('\n'+industry)
print('-'*len(industry))
for ticker in data_dict[industry]:
print(ticker)
if __name__ == '__main__':
url = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
scraped_data = get_stocks(url)
output_data(scraped_data)