93 lines
3.0 KiB
Python
93 lines
3.0 KiB
Python
import PIL.Image
|
|
import requests
|
|
from typing import Any
|
|
from settings import IMAGE_TARGET_SIZE
|
|
import PIL
|
|
import logging
|
|
from io import BytesIO
|
|
|
|
TECHNICAL_DIR = "technical"
|
|
|
|
SUBREDDITS = [
|
|
"shittytechnicals",
|
|
]
|
|
|
|
|
|
def get_top_json(subreddit: str, after: str | None = None) -> dict[str, Any]:
|
|
"""Get the top posts from a subreddit
|
|
|
|
Args:
|
|
subreddit (str): The subreddit to get the top posts from
|
|
|
|
Returns:
|
|
dict[str, Any]: The JSON response from the Reddit API
|
|
"""
|
|
url = f"https://www.reddit.com/r/{subreddit}/top.json?t=all"
|
|
headers = {"User-Agent": "Mozilla/5.0"}
|
|
params = {}
|
|
if after is not None:
|
|
params["after"] = after
|
|
response = requests.get(url, headers=headers, params=params)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
|
|
|
|
def save_subreddit_images(
|
|
subreddit: str, target: str, sz: int, limit: int = 100
|
|
) -> int:
|
|
"""Save the images from the top posts of a subreddit
|
|
|
|
Args:
|
|
subreddit (str): The subreddit to get the images from
|
|
target (str): The directory to save the images to
|
|
sz (int): The size to resize the images to
|
|
limit (int, optional): The number of images to download. Defaults to 100.
|
|
|
|
Returns:
|
|
int: The number of images downloaded
|
|
"""
|
|
gotten = 0
|
|
after = None
|
|
while gotten < limit:
|
|
data = get_top_json(subreddit, after)
|
|
after = data["data"]["after"]
|
|
for post in data["data"]["children"]:
|
|
preview = post["data"].get("preview", None)
|
|
if preview is None:
|
|
continue
|
|
for image in preview["images"]:
|
|
url: str = image["source"]["url"].split("?")[0]
|
|
# resize the image
|
|
try:
|
|
response = requests.get(url)
|
|
except Exception as e:
|
|
logging.warning(f"Failed to download {url}: {e}")
|
|
continue
|
|
if not response.ok:
|
|
logging.warning(
|
|
f"Failed to download {url}: {response.status_code}, {response.reason}"
|
|
)
|
|
thumb = post["data"].get("thumbnail", None)
|
|
if thumb is None:
|
|
continue
|
|
try:
|
|
response = requests.get(thumb)
|
|
except Exception as e:
|
|
logging.warning(f"Failed to download {thumb}: {e}")
|
|
continue
|
|
if not response.ok:
|
|
logging.warning(
|
|
f"Failed to download {thumb}: {response.status_code}, {response.reason}"
|
|
)
|
|
continue
|
|
image = PIL.Image.open(BytesIO(response.content))
|
|
image = image.resize((sz, sz))
|
|
image.save(f"{target}/{url.split('/')[-1]}")
|
|
gotten += 1
|
|
return gotten
|
|
|
|
|
|
if __name__ == "__main__":
|
|
for subreddit in SUBREDDITS:
|
|
save_subreddit_images(subreddit, TECHNICAL_DIR, IMAGE_TARGET_SIZE, 512)
|