project_python_rynekNieruch.../home_pricing/DataCollectingScraper/DataCollectingScraper.py

40 lines
1.5 KiB
Python
Raw Permalink Normal View History

2024-02-26 16:54:44 +01:00
from typing import Any
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from DataCollectingScraper.DataScrapers.IDataScraper import IDataScraper, IDataScraperState
class DataCollectingScraper:
def __init__(self, scraper_impl: IDataScraper) -> None:
self.scraper_impl : IDataScraper = scraper_impl
self.driver : webdriver.Chrome = None
self.instantiate_driver()
@staticmethod
def get_driver_options() -> list:
return [
# "--headless",
"--no-sandbox",
"--disable-dev-shm-usage",
]
def instantiate_driver(self) -> None:
options = Options()
opts = DataCollectingScraper.get_driver_options()
for opt in opts:
options.add_argument(opt)
self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
def __call__(self, *args: Any, **kwds: Any) -> None:
state : IDataScraperState = None
if self.driver is not None:
while(True):
print(f"Calling DataCollectingScraper for {str(self.scraper_impl)} and page {self.scraper_impl.get_home_page()}")
state = self.scraper_impl.scrap_data(self.driver, state)
if not state.finished():
self.instantiate_driver()
else:
break