From 6da93f41f0eba7b2e2c1a10efcbb6df49c1f6a0d Mon Sep 17 00:00:00 2001 From: Damian Michalski Date: Sun, 19 Jan 2020 03:23:04 +0100 Subject: [PATCH] OtoMoto webscrapper sourcecode added --- Car4You_webscrapper/pom.xml | 27 ++++ .../src/main/java/CrawlerThread.java | 122 ++++++++++++++++++ .../src/main/java/ThreadManager.java | 21 +++ .../src/main/java/dbService.java | 115 +++++++++++++++++ 4 files changed, 285 insertions(+) create mode 100644 Car4You_webscrapper/pom.xml create mode 100644 Car4You_webscrapper/src/main/java/CrawlerThread.java create mode 100644 Car4You_webscrapper/src/main/java/ThreadManager.java create mode 100644 Car4You_webscrapper/src/main/java/dbService.java diff --git a/Car4You_webscrapper/pom.xml b/Car4You_webscrapper/pom.xml new file mode 100644 index 0000000..3406f77 --- /dev/null +++ b/Car4You_webscrapper/pom.xml @@ -0,0 +1,27 @@ + + + 4.0.0 + + groupId + Car4You_webscrapper + 1.0-SNAPSHOT + + + + + org.jsoup + jsoup + 1.12.1 + + + + + postgresql + postgresql + 9.1-901-1.jdbc4 + + + + \ No newline at end of file diff --git a/Car4You_webscrapper/src/main/java/CrawlerThread.java b/Car4You_webscrapper/src/main/java/CrawlerThread.java new file mode 100644 index 0000000..40aa692 --- /dev/null +++ b/Car4You_webscrapper/src/main/java/CrawlerThread.java @@ -0,0 +1,122 @@ +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.Hashtable; + +public class CrawlerThread extends Thread { + + public CrawlerThread(){ + + } + + @Override + public void run() { + + String pageUrl = "https://www.otomoto.pl/osobowe/?search%5Bfilter_enum_has_registration%5D=1&search%5Bfilter_enum_damaged%5D=0&search%5Bfilter_enum_registered%5D=1&search%5Bfilter_enum_no_accident%5D=1&search%5Border%5D=filter_float_price%3Aasc&search%5Bbrand_program_id%5D%5B0%5D=&search%5Bcountry%5D=&page="; + for (int i = 1; i <= 500; i++) { + Document doc = null; + System.out.println("Strona nr: " + i); + String targetUrl = pageUrl + Integer.valueOf(i); + System.out.println("Link: " + targetUrl); + Boolean success = false; //flaga do sprawdzania czy udało sie połączyc + while (!success) { + success = true; + try { + doc = Jsoup.connect(targetUrl).get(); + } catch (IOException e) { + System.out.println("Nie udalo sie polaczyc. " + e.getMessage()); + success = false; + } + } + + Elements links = doc.select("a.offer-title__link"); + + for (Element element : links) { + Element photoDiv = element.parent().parent().parent().parent().selectFirst("div").selectFirst("span"); +// System.out.println("Promowana: " + photoDiv); + if (photoDiv == null) { //todo sprawdzać czy oferta jest promowana + String url = element.attr("href"); + System.out.println("Pobieram: " + url); + Document pageDoc = null; + success = false; //flaga do sprawdzania czy udało sie połączyc + while (!success) { + success = true; + try { + pageDoc = Jsoup.connect(url).get(); + } catch (IOException e) { + System.out.println("Nie udalo sie polaczyc. " + e.getMessage()); + success = false; + } + } + Elements offerParamsList = pageDoc.select("ul.offer-params__list").select("li.offer-params__item"); + Hashtable paramsHashtable = new Hashtable(); + + String priceString = pageDoc.select("span.offer-price__number").first().text(); + if(!priceString.contains(",")) { + priceString = priceString.substring(0, priceString.length() - 4); + priceString = priceString.replaceAll(" ", ""); + }else{ + priceString = priceString.replaceAll(",", ""); + priceString = priceString.substring(0, priceString.length()-2); + priceString = priceString.substring(0, priceString.length() - 4); + priceString = priceString.replaceAll(" ", ""); + } + Integer price = Integer.valueOf(priceString); + + for (Element param : offerParamsList) { //wyciąganie wartości parametrów do tablicy + String key = param.select("span.offer-params__label").text(); + String value = param.select("div.offer-params__value").text(); + paramsHashtable.put(key, value); + } + + try { + String brand = paramsHashtable.get("Marka pojazdu"); + String model = paramsHashtable.get("Model pojazdu"); + String version = paramsHashtable.get("Wersja"); + Integer year = Integer.valueOf(paramsHashtable.get("Rok produkcji")); + Integer enginePower = Integer.valueOf(paramsHashtable.get("Moc").split(" ")[0]); + String engineCapacityString = paramsHashtable.get("Pojemność skokowa").replaceAll(" ", ""); + engineCapacityString = engineCapacityString.substring(0, engineCapacityString.length() - 3); + Double engineCapacity = Double.valueOf(engineCapacityString); + String fuel = paramsHashtable.get("Rodzaj paliwa"); + String transmission = paramsHashtable.get("Skrzynia biegów"); + String drive = paramsHashtable.get("Napęd"); + String bodyType = paramsHashtable.get("Typ"); + Integer doors = Integer.valueOf(paramsHashtable.get("Liczba drzwi")); + Integer seats = Integer.valueOf(paramsHashtable.get("Liczba miejsc")); + + if(year == null) year = 0; + if(version == null) version = "nieokreślono"; + if(drive == null) drive = "nieokreślono"; + if(transmission == null) transmission = "nieokreślono"; + +// System.out.println("Samochód: " + +// "\nmarka: " + brand + +// "\nmodel: " + model + +// "\nwersja: " + version + +// "\nrok produkcji: " + year + +// "\nmoc silnika: " + enginePower + +// "\npojemnosc skokoa: " + engineCapacity + +// "\nrodzaj paliwa; " + fuel + +// "\nskrzynia biegow: " + transmission + +// "\nnapęd: " + drive + +// "\ntyp nadwozia: " + bodyType + +// "\nliczba drzwi: " + doors + +// "\nliczba miejsc: " + seats + +// "\ncena: " + price); + + dbService.insert(brand, model, version, year, enginePower, engineCapacity, fuel, transmission, drive, bodyType, doors, seats, price); + } catch (Exception e) { + System.out.println("Błąd pobierania danych!: " + e.getMessage()); + } +// break; + }else{ + System.out.println("Oferta promowana. Pomijam. "+element.text()); + } + } + } + } +} diff --git a/Car4You_webscrapper/src/main/java/ThreadManager.java b/Car4You_webscrapper/src/main/java/ThreadManager.java new file mode 100644 index 0000000..1032716 --- /dev/null +++ b/Car4You_webscrapper/src/main/java/ThreadManager.java @@ -0,0 +1,21 @@ +import java.util.ArrayList; + +public class ThreadManager { + + public static void main(String[] args) { + + ArrayList threads = new ArrayList(); + + //tworzenie watkow + for(int i = 0; i < 1; i++){ + threads.add(new CrawlerThread()); + } + + //uruchomienie watkow + for(CrawlerThread t:threads){ + t.start(); + } + + } + +} diff --git a/Car4You_webscrapper/src/main/java/dbService.java b/Car4You_webscrapper/src/main/java/dbService.java new file mode 100644 index 0000000..448732d --- /dev/null +++ b/Car4You_webscrapper/src/main/java/dbService.java @@ -0,0 +1,115 @@ +import java.sql.*; + +public class dbService { + // JDBC driver name and database URL + static final String JDBC_DRIVER = "com.postgresql.jdbc.Driver"; + static final String DB_URL = ""; + + // Database credentials + static final String USER = ""; + static final String PASS = ""; + + public static void insert(String brand, String model, String version, Integer year, Integer enginePower, Double engineCapacity, String fuel, String transmission, String drive, String bodyType, Integer doors, Integer seats, Integer price) { + Connection conn = null; + try{ + conn = DriverManager.getConnection(DB_URL,USER,PASS); + + //najpierw sprwadzac czy takie auto juz jest + String selectString = "" + + "SELECT * " + + "FROM car " + + "WHERE " + + "brand = ? AND " + + "model = ? AND " + + "version = ? AND " + + "engine_power = ? AND " + + "engine_capacity = ? AND " + + "fuel = ? AND " + + "transmission = ? AND " + + "drive = ? AND " + + "body_type = ? AND " + + "doors = ? AND " + + "seats = ?"; + PreparedStatement preparedSelect = conn.prepareStatement(selectString); + preparedSelect.setString(1, brand); + preparedSelect.setString(2, model); + preparedSelect.setString(3, version); + preparedSelect.setInt(4, enginePower); + preparedSelect.setDouble(5, engineCapacity); + preparedSelect.setString(6, fuel); + preparedSelect.setString(7, transmission); + preparedSelect.setString(8, drive); + preparedSelect.setString(9, bodyType); + preparedSelect.setInt(10, doors); + preparedSelect.setInt(11, seats); + + ResultSet rs = preparedSelect.executeQuery(); + if(rs.next()) { + try { + Long repeatedId = rs.getLong("id"); + Integer minPrice = rs.getInt("price_from"); + Integer maxPrice = rs.getInt("price_to"); + Long priceSum = rs.getLong("price_sum"); + Long counter = rs.getLong("counter"); + Double avarage; + if (price >= maxPrice) { + maxPrice = price; + } else { + minPrice = price; + } + priceSum = priceSum + price; + counter = counter + 1; + avarage = priceSum / (double) counter; + + //update z nowymi danymi / zmiana tylko 5 parametrow + String updateString = "" + + "UPDATE car " + + "SET price_from = ?, price_to = ?, price_sum = ?, avarage = ?, counter = ? WHERE id = ?"; + PreparedStatement updatePrepare = conn.prepareStatement(updateString); + updatePrepare.setInt(1, minPrice); + updatePrepare.setInt(2, maxPrice); + updatePrepare.setLong(3, priceSum); + updatePrepare.setDouble(4, avarage); + updatePrepare.setLong(5, counter); + updatePrepare.setLong(6, repeatedId); + + int rows = updatePrepare.executeUpdate(); + //zaktualizowano + System.out.println("Zauktualizowano: " + brand); + }catch (Exception e){ + System.out.println("Błąd podczas aktualizacji ogłoszenia: " + e.getMessage()); + } + }else{ + String sql = "INSERT INTO car (brand, model, version, year, engine_power, engine_capacity, fuel, transmission, drive, body_type, doors, seats, price_from, price_to, price_sum, counter, avarage) " + + "values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; + + PreparedStatement preparedStatement = conn.prepareStatement(sql); + preparedStatement.setString(1, brand); + preparedStatement.setString(2, model); + preparedStatement.setString(3, version); + preparedStatement.setInt(4, year); + preparedStatement.setInt(5, enginePower); + preparedStatement.setDouble(6, engineCapacity); + preparedStatement.setString(7, fuel); + preparedStatement.setString(8, transmission); + preparedStatement.setString(9, drive); + preparedStatement.setString(10, bodyType); + preparedStatement.setInt(11, doors); + preparedStatement.setInt(12, seats); + preparedStatement.setInt(13, price); + preparedStatement.setInt(14, price); + preparedStatement.setInt(15, price); + preparedStatement.setInt(16, 1); + preparedStatement.setDouble(17, price); + + int row = preparedStatement.executeUpdate(); + System.out.println("Dodano: " + brand); + + preparedStatement.close(); + conn.close(); + } + }catch(SQLException se){ + se.printStackTrace(); + } + } +}