40 lines
1.5 KiB
Python
40 lines
1.5 KiB
Python
|
from typing import Any
|
||
|
from selenium import webdriver
|
||
|
from selenium.webdriver.chrome.options import Options
|
||
|
from selenium.webdriver.chrome.service import Service
|
||
|
from webdriver_manager.chrome import ChromeDriverManager
|
||
|
|
||
|
from DataCollectingScraper.DataScrapers.IDataScraper import IDataScraper, IDataScraperState
|
||
|
|
||
|
class DataCollectingScraper:
|
||
|
def __init__(self, scraper_impl: IDataScraper) -> None:
|
||
|
self.scraper_impl : IDataScraper = scraper_impl
|
||
|
self.driver : webdriver.Chrome = None
|
||
|
self.instantiate_driver()
|
||
|
|
||
|
@staticmethod
|
||
|
def get_driver_options() -> list:
|
||
|
return [
|
||
|
# "--headless",
|
||
|
"--no-sandbox",
|
||
|
"--disable-dev-shm-usage",
|
||
|
]
|
||
|
|
||
|
def instantiate_driver(self) -> None:
|
||
|
options = Options()
|
||
|
opts = DataCollectingScraper.get_driver_options()
|
||
|
for opt in opts:
|
||
|
options.add_argument(opt)
|
||
|
self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
|
||
|
|
||
|
def __call__(self, *args: Any, **kwds: Any) -> None:
|
||
|
state : IDataScraperState = None
|
||
|
if self.driver is not None:
|
||
|
while(True):
|
||
|
print(f"Calling DataCollectingScraper for {str(self.scraper_impl)} and page {self.scraper_impl.get_home_page()}")
|
||
|
state = self.scraper_impl.scrap_data(self.driver, state)
|
||
|
if not state.finished():
|
||
|
self.instantiate_driver()
|
||
|
else:
|
||
|
break
|