diff --git a/.gitignore b/.gitignore index 449fe6d..be6b6be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .pyc .DS_Store -_tmp \ No newline at end of file +_tmp +env +__pycache__ \ No newline at end of file diff --git a/02_find_all_links.py b/02_find_all_links.py index 76a7c99..37dff11 100644 --- a/02_find_all_links.py +++ b/02_find_all_links.py @@ -1,18 +1,18 @@ -import urllib2 +import requests import re # get url -url =raw_input('Enter a URL (include `http://`): ') +url = input('Enter a URL (include `http://`): ') # connect to the url -website = urllib2.urlopen(url) +website = requests.get(url) # read html -html = website.read() +html = website.text # use re.findall to grab all the links links = re.findall('"((http|ftp)s?://.*?)"', html) # output links for link in links: - print link[0] \ No newline at end of file + print(link[0]) diff --git a/03_simple_twitter_manager.py b/03_simple_twitter_manager.py index b7e4a51..e39a20b 100644 --- a/03_simple_twitter_manager.py +++ b/03_simple_twitter_manager.py @@ -1,25 +1,28 @@ import twitter - - + + TWITTER_CONSUMER_KEY = 'XXX' TWITTER_CONSUMER_SECRET = 'XXX' TWITTER_ACCESS_TOKEN_KEY = 'XXX' TWITTER_ACCESS_TOKEN_SECRET = 'XXX' - + twitter_api = twitter.Api( consumer_key=TWITTER_CONSUMER_KEY, consumer_secret=TWITTER_CONSUMER_SECRET, access_token_key=TWITTER_ACCESS_TOKEN_KEY, access_token_secret=TWITTER_ACCESS_TOKEN_SECRET ) - + if __name__ == '__main__': follower_ids = twitter_api.GetFollowerIDs() following_ids = twitter_api.GetFriendIDs() - zombie_follows = [following_id for following_id in following_ids if following_id not in follower_ids] - - confirm = raw_input("Are you sure you want to unfollow %s tweeps [y|n]? " % (len(zombie_follows))) + zombie_follows = [following_id for following_id in + following_ids if following_id not in follower_ids] + + confirm = raw_input( + "Are you sure you want to unfollow {0} tweeps [y|n]? ".format( + (len(zombie_follows)))) if confirm.lower() == 'y': for id in zombie_follows: user = twitter_api.DestroyFriendship(user_id=id) - print "Unfollowed %s" % (user.screen_name) \ No newline at end of file + print("Unfollowed {0}".format(user.screen_name)) diff --git a/04_rename_with_slice.py b/04_rename_with_slice.py index 2ff84f7..dd849ef 100644 --- a/04_rename_with_slice.py +++ b/04_rename_with_slice.py @@ -8,7 +8,7 @@ for file in glob.glob("*.json"): new_file_name = file_name[:-6] + extension try: os.rename(file, new_file_name) - except OSError, e: - print e + except OSError as e: + print(e) else: - print "Renamed {} to {}".format(file, new_file_name) + print("Renamed {} to {}".format(file, new_file_name)) diff --git a/05_load_json_without_dupes.py b/05_load_json_without_dupes.py index 5b767a2..2cbe318 100644 --- a/05_load_json_without_dupes.py +++ b/05_load_json_without_dupes.py @@ -1,11 +1,9 @@ -import json - def dict_raise_on_duplicates(ordered_pairs): """reject duplicate keys""" my_dict = dict() for key, values in ordered_pairs: if key in my_dict: - raise ValueError("Duplicate key: {}".format(key,)) + raise ValueError("Duplicate key: {}".format(key,)) else: - my_dict[key] = values - return my_dict \ No newline at end of file + my_dict[key] = values + return my_dict diff --git a/06_execution_time.py b/06_execution_time.py index 95d5ca4..9614bbd 100644 --- a/06_execution_time.py +++ b/06_execution_time.py @@ -13,6 +13,7 @@ For example: import time +import random class ExecutionTime: @@ -25,9 +26,9 @@ class ExecutionTime: # ---- run code ---- # -import random timer = ExecutionTime() sample_list = list() -my_list = [random.randint(1, 888898) for num in xrange(1, 1000000) if num % 2 == 0] -print 'Finished in {} seconds.'.format(timer.duration()) \ No newline at end of file +my_list = [random.randint(1, 888898) for num in + range(1, 1000000) if num % 2 == 0] +print('Finished in {} seconds.'.format(timer.duration())) diff --git a/07_benchmark_permissions_loading_django.py b/07_benchmark_permissions_loading_django.py index 0e2e06b..e1e6900 100644 --- a/07_benchmark_permissions_loading_django.py +++ b/07_benchmark_permissions_loading_django.py @@ -14,8 +14,8 @@ def timeit(method): te = time.time() all_times.append(te - ts) - print all_times - print numpy.mean(all_times) + print(all_times) + print(numpy.mean(all_times)) return result return timed @@ -39,4 +39,4 @@ if __name__ == "__main__": while n < 10: create_new_db() load_new_perms() - n += 1 \ No newline at end of file + n += 1 diff --git a/08_basic_email_web_crawler.py b/08_basic_email_web_crawler.py index faca75f..9c6c58f 100644 --- a/08_basic_email_web_crawler.py +++ b/08_basic_email_web_crawler.py @@ -1,6 +1,9 @@ import requests import re -import urlparse +try: + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin # regex email_re = re.compile(r'([\w\.,]+@[\w\.,]+\.\w+)') @@ -20,13 +23,13 @@ def crawl(url): # Find links links = link_re.findall(req.text) - print "\nFound {} links".format(len(links)) + print("\nFound {} links".format(len(links))) # Search links for emails for link in links: # Get an absolute URL for a link - link = urlparse.urljoin(url, link) + link = urljoin(url, link) # Find all emails on current page result.update(email_re.findall(req.text)) @@ -36,7 +39,7 @@ def crawl(url): if __name__ == '__main__': emails = crawl('http://www.realpython.com') - print "\nScrapped e-mail addresses:" + print("\nScrapped e-mail addresses:") for email in emails: - print email - print "\n" + print(email) + print("\n") diff --git a/09_basic_link_web_crawler.py b/09_basic_link_web_crawler.py index 4531ac3..87e2fab 100644 --- a/09_basic_link_web_crawler.py +++ b/09_basic_link_web_crawler.py @@ -1,6 +1,9 @@ import requests import re -import urlparse +try: + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin # regex link_re = re.compile(r'href="(.*?)"') @@ -17,17 +20,15 @@ def crawl(url): # Find links links = link_re.findall(req.text) - print "\nFound {} links".format(len(links)) + print("\nFound {} links".format(len(links))) # Search links for emails for link in links: # Get an absolute URL for a link - link = urlparse.urljoin(url, link) + link = urljoin(url, link) - print link - + print(link) if __name__ == '__main__': crawl('http://www.realpython.com') - diff --git a/10_find_files_recursively.py b/10_find_files_recursively.py index 7251b10..91cd73c 100644 --- a/10_find_files_recursively.py +++ b/10_find_files_recursively.py @@ -2,7 +2,7 @@ import fnmatch import os # constants -PATH = '/../../../..' +PATH = './' PATTERN = '*.py' @@ -14,18 +14,18 @@ def get_file_names(filepath, pattern): # matches.append(os.path.join(root, filename)) # full path matches.append(os.path.join(filename)) # just file name if matches: - print "Found {} files:".format(len(matches)) + print("Found {} files:".format(len(matches))) output_files(matches) else: - print "No files found." + print("No files found.") else: - print "Sorry that path does not exist. Try again." + print("Sorry that path does not exist. Try again.") def output_files(list_of_files): for filename in list_of_files: - print filename + print(filename) if __name__ == '__main__': - all_files = get_file_names(PATH, PATTERN) \ No newline at end of file + all_files = get_file_names(PATH, PATTERN) diff --git a/11_optimize_images_with_wand.py b/11_optimize_images_with_wand.py index c3449fd..a95b8b0 100644 --- a/11_optimize_images_with_wand.py +++ b/11_optimize_images_with_wand.py @@ -1,9 +1,9 @@ import fnmatch import os -# sudo pip install Wand +# pip install Wand from wand.image import Image -# sudo pip install http://pypi.python.org/packages/source/h/hurry.filesize/hurry.filesize-0.9.tar.gz +# pip install http://pypi.python.org/packages/source/h/hurry.filesize/hurry.filesize-0.9.tar.gz from hurry.filesize import size @@ -19,12 +19,13 @@ def get_image_file_names(filepath, pattern): for filename in fnmatch.filter(filenames, pattern): matches.append(os.path.join(root, filename)) # full path if matches: - print "Found {} files, with a total file size of {}.".format(len(matches), get_total_size(matches)) + print("Found {} files, with a total file size of {}.".format( + len(matches), get_total_size(matches))) return matches else: - print "No files found." + print("No files found.") else: - print "Sorry that path does not exist. Try again." + print("Sorry that path does not exist. Try again.") def get_total_size(list_of_image_names): @@ -35,7 +36,7 @@ def get_total_size(list_of_image_names): def resize_images(list_of_image_names): - print "Optimizing ... " + print("Optimizing ... ") for index, image_name in enumerate(list_of_image_names): with open(image_name) as f: image_binary = f.read() @@ -43,7 +44,7 @@ def resize_images(list_of_image_names): if img.height >= 600: img.transform(resize='x600') img.save(filename=image_name) - print "Optimization complete." + print("Optimization complete.") if __name__ == '__main__': diff --git a/12_csv_split.py b/12_csv_split.py index 65c698c..43ed1ee 100644 --- a/12_csv_split.py +++ b/12_csv_split.py @@ -117,10 +117,10 @@ def parse_file(arguments): writer = writer.writerows(chunk) # Output info - print "" - print "Chunk # {}:".format(current_chunk) - print "Filepath: {}".format(current_output) - print "# of rows: {}".format(len(chunk)) + print("") + print("Chunk # {}:".format(current_chunk)) + print("Filepath: {}".format(current_output)) + print("# of rows: {}".format(len(chunk))) # Create new chunk current_chunk += 1 diff --git a/13_random_name_generator.py b/13_random_name_generator.py index 0719eec..6f0a00a 100644 --- a/13_random_name_generator.py +++ b/13_random_name_generator.py @@ -10,7 +10,7 @@ def random_name_generator(first, second, x): - number of random names """ names = [] - for i in xrange(0, int(x)): + for i in range(0, int(x)): random_first = randint(0, len(first)-1) random_last = randint(0, len(second)-1) names.append("{0} {1}".format( @@ -23,4 +23,4 @@ def random_name_generator(first, second, x): first_names = ["Drew", "Mike", "Landon", "Jeremy", "Tyler", "Tom", "Avery"] last_names = ["Smith", "Jones", "Brighton", "Taylor"] names = random_name_generator(first_names, last_names, 5) -print '\n'.join(names) +print('\n'.join(names)) diff --git a/15_check_my_environment.py b/15_check_my_environment.py index 11017c4..62e0b8d 100644 --- a/15_check_my_environment.py +++ b/15_check_my_environment.py @@ -11,7 +11,7 @@ class Main: pass def process(self): - print "ok" + print("ok") if __name__ == "__main__": m = Main(some_script.CONFIGFILE) @@ -39,7 +39,7 @@ CONFIGFILE = get_config_file() if CONFIGFILE is None: sys.exit("Configuration error! Unknown environment set. \ Edit config.py and set appropriate environment") -print "Config file: {}".format(CONFIGFILE) +print("Config file: {}".format(CONFIGFILE)) if not os.path.exists(CONFIGFILE): sys.exit("Configuration error! Config file does not exist") -print "Config ok ...." +print("Config ok ....") diff --git a/18_zipper.py b/18_zipper.py index a350a70..43c956d 100755 --- a/18_zipper.py +++ b/18_zipper.py @@ -3,7 +3,7 @@ from datetime import datetime from zipfile import ZipFile -#set file name and time of creation +# set file name and time of creation today = datetime.now() file_name = 'zipper_' + today.strftime('%Y.%m.%dh%H%M') + '.zip' dir_name = 'tmp/' # update path diff --git a/20_restore_file_from_git.py b/20_restore_file_from_git.py index f692d9d..b1f581b 100644 --- a/20_restore_file_from_git.py +++ b/20_restore_file_from_git.py @@ -1,9 +1,9 @@ from subprocess import check_output, call -file_name = str(raw_input('Enter the file name: ')) +file_name = str(input('Enter the file name: ')) commit = check_output(["git", "rev-list", "-n", "1", "HEAD", "--", file_name]) -print str(commit).rstrip() +print(str(commit).rstrip()) call(["git", "checkout", str(commit).rstrip()+"~1", file_name]) diff --git a/22_git_tag.py b/22_git_tag.py index 283f495..4849c07 100644 --- a/22_git_tag.py +++ b/22_git_tag.py @@ -10,5 +10,5 @@ if len(sys.argv) == 3: subprocess.call(command, shell=True) subprocess.call('git push --tags', shell=True) else: - print 'usage: tag.py TAG_NAME COMMIT' + print('usage: tag.py TAG_NAME COMMIT') sys.exit(1) diff --git a/24_sql2csv.py b/24_sql2csv.py index 19d0c42..4e8f484 100644 --- a/24_sql2csv.py +++ b/24_sql2csv.py @@ -3,7 +3,7 @@ import csv import sqlite3 if len(sys.argv) < 3: - print "Use: {0} DATABASE_NAME TABLE_NAME".format(sys.argv[0]) + print("Use: {0} DATABASE_NAME TABLE_NAME".format(sys.argv[0])) exit() conn = sqlite3.connect(sys.argv[1]) diff --git a/25_ip2geolocation.py b/25_ip2geolocation.py index f312989..f593676 100644 --- a/25_ip2geolocation.py +++ b/25_ip2geolocation.py @@ -9,7 +9,7 @@ def get_addresses(filename): row info from the csv file. """ all_addresses = [] - with open(filename, 'rb') as f: + with open(filename, 'rt') as f: reader = csv.reader(f) for row in reader: all_addresses.append(row) @@ -29,7 +29,7 @@ def get_geolocation(all_the_ip_address): header_row.extend(['Country', 'City']) # get geolocation for line in all_the_ip_address: - print "Grabbing geo info for row # {0}".format(counter) + print("Grabbing geo info for row # {0}".format(counter)) r = requests.get('https://freegeoip.net/json/{0}'.format(line[0])) line.extend([str(r.json()['country_name']), str(r.json()['city'])]) updated_addresses.append(line) @@ -43,10 +43,15 @@ def create_csv(updated_address_list): Given the updated lists of lists from `get_geolocation()`, this function creates a new CSV. """ - with open('output.csv', 'wb') as f: + import sys + if sys.version_info >= (3, 0, 0): + f = open('output.csv', 'w', newline='') + else: + f = open('output.csv', 'wb') + with f: writer = csv.writer(f) writer.writerows(updated_address_list) - print "All done!" + print("All done!") if __name__ == '__main__': diff --git a/26_stock_scraper.py b/26_stock_scraper.py new file mode 100644 index 0000000..3e69cc2 --- /dev/null +++ b/26_stock_scraper.py @@ -0,0 +1,32 @@ +import requests +from lxml import html +from collections import defaultdict + + +def get_stocks(url): + # Make Request + page = requests.get(url) + # Parse/Scrape + tree = html.fromstring(page.text) + xpath = '//*[@id="mw-content-text"]/table[1]' + rows = tree.xpath(xpath)[0].findall("tr") + rows = [(row.getchildren()[0], row.getchildren()[3]) for row in rows[1:]] + rows = [(row[0].getchildren()[0].text, row[1].text) for row in rows] + industries = defaultdict(list) + for row in rows: + industries[row[1]].append(row[0]) + return industries + + +def output_data(data_dict): + for industry in data_dict: + print('\n'+industry) + print('-'*len(industry)) + for ticker in data_dict[industry]: + print(ticker) + + +if __name__ == '__main__': + url = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies' + scraped_data = get_stocks(url) + output_data(scraped_data) diff --git a/readme.md b/readme.md index ad5d157..2e00620 100644 --- a/readme.md +++ b/readme.md @@ -24,4 +24,5 @@ 1. **22_git_tag.py**: Create Git Tag based on a commit 1. **23_flask_session_test.py**: Just a simple app to see if the sessions are working 1. **24_sql2csv.py**: SQL to CSV. -1. **25_ip2geolocation.py**: Given a CSV file with an ip address (see sample - *25_sample_csv.csv*), return the geolocation based on the ip. \ No newline at end of file +1. **25_ip2geolocation.py**: Given a CSV file with an ip address (see sample - *25_sample_csv.csv*), return the geolocation based on the ip. +1. **26_stock_scraper.py**: Scrape the S&P 500 Companies list from Wikipedia, then output he data. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d1a3d68 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +Flask==0.10.1 +Jinja2==2.7.3 +MarkupSafe==0.23 +Wand==0.4.0 +Werkzeug==0.10.4 +hurry.filesize==0.9 +itsdangerous==0.24 +lxml==3.4.4 +numpy==1.9.2 +requests==2.7.0