fix: add error handling and increase timeout for images
Signed-off-by: paprykdev <58005447+paprykdev@users.noreply.github.com>
This commit is contained in:
parent
cab794cbef
commit
e58f5e0cad
@ -3,6 +3,7 @@ import requests
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
from sys import exit
|
||||||
|
|
||||||
|
|
||||||
class MonetScraper:
|
class MonetScraper:
|
||||||
@ -67,17 +68,23 @@ class MonetScraper:
|
|||||||
for e in el:
|
for e in el:
|
||||||
self.hrefs.append(await e.get_attribute('href'))
|
self.hrefs.append(await e.get_attribute('href'))
|
||||||
|
|
||||||
async def get_image(self):
|
async def get_image(self, href):
|
||||||
image = "null"
|
image = "null"
|
||||||
i = 0
|
i = 0
|
||||||
while image == "null" and i < 10:
|
self.page.set_default_timeout(10000)
|
||||||
|
while image == "null" and i < 30:
|
||||||
|
try:
|
||||||
image = await self.find_el(
|
image = await self.find_el(
|
||||||
".not-full-screen-image-container > img")
|
".not-full-screen-image-container > img")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}\n\nOn page: {href}")
|
||||||
|
exit(1)
|
||||||
image = await image.get_attribute('srcset')
|
image = await image.get_attribute('srcset')
|
||||||
image = image.split(",")[0].split(" ")[0]
|
image = image.split(",")[0].split(" ")[0]
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
|
self.page.set_default_timeout(5000)
|
||||||
return image
|
return image
|
||||||
|
|
||||||
def curl_image(self, image, title, id):
|
def curl_image(self, image, title, id):
|
||||||
@ -160,7 +167,7 @@ class MonetScraper:
|
|||||||
async def get_data(self):
|
async def get_data(self):
|
||||||
for index, href in enumerate(self.hrefs):
|
for index, href in enumerate(self.hrefs):
|
||||||
await self.go_to(f"{self.base_url}{href}")
|
await self.go_to(f"{self.base_url}{href}")
|
||||||
image = await self.get_image()
|
image = await self.get_image(href)
|
||||||
title = await self.get_title()
|
title = await self.get_title()
|
||||||
get_info = await self.get_info()
|
get_info = await self.get_info()
|
||||||
provenance = await self.get_provenance()
|
provenance = await self.get_provenance()
|
||||||
|
Loading…
Reference in New Issue
Block a user