OtoMoto webscrapper sourcecode added
This commit is contained in:
parent
33f95c77bb
commit
6da93f41f0
27
Car4You_webscrapper/pom.xml
Normal file
27
Car4You_webscrapper/pom.xml
Normal file
@ -0,0 +1,27 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>groupId</groupId>
|
||||
<artifactId>Car4You_webscrapper</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<!-- jsoup HTML parser library @ https://jsoup.org/ -->
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.12.1</version>
|
||||
</dependency>
|
||||
|
||||
<!-- https://mvnrepository.com/artifact/postgresql/postgresql -->
|
||||
<dependency>
|
||||
<groupId>postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
<version>9.1-901-1.jdbc4</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
</project>
|
122
Car4You_webscrapper/src/main/java/CrawlerThread.java
Normal file
122
Car4You_webscrapper/src/main/java/CrawlerThread.java
Normal file
@ -0,0 +1,122 @@
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Hashtable;
|
||||
|
||||
public class CrawlerThread extends Thread {
|
||||
|
||||
public CrawlerThread(){
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
String pageUrl = "https://www.otomoto.pl/osobowe/?search%5Bfilter_enum_has_registration%5D=1&search%5Bfilter_enum_damaged%5D=0&search%5Bfilter_enum_registered%5D=1&search%5Bfilter_enum_no_accident%5D=1&search%5Border%5D=filter_float_price%3Aasc&search%5Bbrand_program_id%5D%5B0%5D=&search%5Bcountry%5D=&page=";
|
||||
for (int i = 1; i <= 500; i++) {
|
||||
Document doc = null;
|
||||
System.out.println("Strona nr: " + i);
|
||||
String targetUrl = pageUrl + Integer.valueOf(i);
|
||||
System.out.println("Link: " + targetUrl);
|
||||
Boolean success = false; //flaga do sprawdzania czy udało sie połączyc
|
||||
while (!success) {
|
||||
success = true;
|
||||
try {
|
||||
doc = Jsoup.connect(targetUrl).get();
|
||||
} catch (IOException e) {
|
||||
System.out.println("Nie udalo sie polaczyc. " + e.getMessage());
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
||||
Elements links = doc.select("a.offer-title__link");
|
||||
|
||||
for (Element element : links) {
|
||||
Element photoDiv = element.parent().parent().parent().parent().selectFirst("div").selectFirst("span");
|
||||
// System.out.println("Promowana: " + photoDiv);
|
||||
if (photoDiv == null) { //todo sprawdzać czy oferta jest promowana
|
||||
String url = element.attr("href");
|
||||
System.out.println("Pobieram: " + url);
|
||||
Document pageDoc = null;
|
||||
success = false; //flaga do sprawdzania czy udało sie połączyc
|
||||
while (!success) {
|
||||
success = true;
|
||||
try {
|
||||
pageDoc = Jsoup.connect(url).get();
|
||||
} catch (IOException e) {
|
||||
System.out.println("Nie udalo sie polaczyc. " + e.getMessage());
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
Elements offerParamsList = pageDoc.select("ul.offer-params__list").select("li.offer-params__item");
|
||||
Hashtable<String, String> paramsHashtable = new Hashtable<String, String>();
|
||||
|
||||
String priceString = pageDoc.select("span.offer-price__number").first().text();
|
||||
if(!priceString.contains(",")) {
|
||||
priceString = priceString.substring(0, priceString.length() - 4);
|
||||
priceString = priceString.replaceAll(" ", "");
|
||||
}else{
|
||||
priceString = priceString.replaceAll(",", "");
|
||||
priceString = priceString.substring(0, priceString.length()-2);
|
||||
priceString = priceString.substring(0, priceString.length() - 4);
|
||||
priceString = priceString.replaceAll(" ", "");
|
||||
}
|
||||
Integer price = Integer.valueOf(priceString);
|
||||
|
||||
for (Element param : offerParamsList) { //wyciąganie wartości parametrów do tablicy
|
||||
String key = param.select("span.offer-params__label").text();
|
||||
String value = param.select("div.offer-params__value").text();
|
||||
paramsHashtable.put(key, value);
|
||||
}
|
||||
|
||||
try {
|
||||
String brand = paramsHashtable.get("Marka pojazdu");
|
||||
String model = paramsHashtable.get("Model pojazdu");
|
||||
String version = paramsHashtable.get("Wersja");
|
||||
Integer year = Integer.valueOf(paramsHashtable.get("Rok produkcji"));
|
||||
Integer enginePower = Integer.valueOf(paramsHashtable.get("Moc").split(" ")[0]);
|
||||
String engineCapacityString = paramsHashtable.get("Pojemność skokowa").replaceAll(" ", "");
|
||||
engineCapacityString = engineCapacityString.substring(0, engineCapacityString.length() - 3);
|
||||
Double engineCapacity = Double.valueOf(engineCapacityString);
|
||||
String fuel = paramsHashtable.get("Rodzaj paliwa");
|
||||
String transmission = paramsHashtable.get("Skrzynia biegów");
|
||||
String drive = paramsHashtable.get("Napęd");
|
||||
String bodyType = paramsHashtable.get("Typ");
|
||||
Integer doors = Integer.valueOf(paramsHashtable.get("Liczba drzwi"));
|
||||
Integer seats = Integer.valueOf(paramsHashtable.get("Liczba miejsc"));
|
||||
|
||||
if(year == null) year = 0;
|
||||
if(version == null) version = "nieokreślono";
|
||||
if(drive == null) drive = "nieokreślono";
|
||||
if(transmission == null) transmission = "nieokreślono";
|
||||
|
||||
// System.out.println("Samochód: " +
|
||||
// "\nmarka: " + brand +
|
||||
// "\nmodel: " + model +
|
||||
// "\nwersja: " + version +
|
||||
// "\nrok produkcji: " + year +
|
||||
// "\nmoc silnika: " + enginePower +
|
||||
// "\npojemnosc skokoa: " + engineCapacity +
|
||||
// "\nrodzaj paliwa; " + fuel +
|
||||
// "\nskrzynia biegow: " + transmission +
|
||||
// "\nnapęd: " + drive +
|
||||
// "\ntyp nadwozia: " + bodyType +
|
||||
// "\nliczba drzwi: " + doors +
|
||||
// "\nliczba miejsc: " + seats +
|
||||
// "\ncena: " + price);
|
||||
|
||||
dbService.insert(brand, model, version, year, enginePower, engineCapacity, fuel, transmission, drive, bodyType, doors, seats, price);
|
||||
} catch (Exception e) {
|
||||
System.out.println("Błąd pobierania danych!: " + e.getMessage());
|
||||
}
|
||||
// break;
|
||||
}else{
|
||||
System.out.println("Oferta promowana. Pomijam. "+element.text());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
21
Car4You_webscrapper/src/main/java/ThreadManager.java
Normal file
21
Car4You_webscrapper/src/main/java/ThreadManager.java
Normal file
@ -0,0 +1,21 @@
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class ThreadManager {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
ArrayList<CrawlerThread> threads = new ArrayList();
|
||||
|
||||
//tworzenie watkow
|
||||
for(int i = 0; i < 1; i++){
|
||||
threads.add(new CrawlerThread());
|
||||
}
|
||||
|
||||
//uruchomienie watkow
|
||||
for(CrawlerThread t:threads){
|
||||
t.start();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
115
Car4You_webscrapper/src/main/java/dbService.java
Normal file
115
Car4You_webscrapper/src/main/java/dbService.java
Normal file
@ -0,0 +1,115 @@
|
||||
import java.sql.*;
|
||||
|
||||
public class dbService {
|
||||
// JDBC driver name and database URL
|
||||
static final String JDBC_DRIVER = "com.postgresql.jdbc.Driver";
|
||||
static final String DB_URL = "";
|
||||
|
||||
// Database credentials
|
||||
static final String USER = "";
|
||||
static final String PASS = "";
|
||||
|
||||
public static void insert(String brand, String model, String version, Integer year, Integer enginePower, Double engineCapacity, String fuel, String transmission, String drive, String bodyType, Integer doors, Integer seats, Integer price) {
|
||||
Connection conn = null;
|
||||
try{
|
||||
conn = DriverManager.getConnection(DB_URL,USER,PASS);
|
||||
|
||||
//najpierw sprwadzac czy takie auto juz jest
|
||||
String selectString = "" +
|
||||
"SELECT * " +
|
||||
"FROM car " +
|
||||
"WHERE " +
|
||||
"brand = ? AND " +
|
||||
"model = ? AND " +
|
||||
"version = ? AND " +
|
||||
"engine_power = ? AND " +
|
||||
"engine_capacity = ? AND " +
|
||||
"fuel = ? AND " +
|
||||
"transmission = ? AND " +
|
||||
"drive = ? AND " +
|
||||
"body_type = ? AND " +
|
||||
"doors = ? AND " +
|
||||
"seats = ?";
|
||||
PreparedStatement preparedSelect = conn.prepareStatement(selectString);
|
||||
preparedSelect.setString(1, brand);
|
||||
preparedSelect.setString(2, model);
|
||||
preparedSelect.setString(3, version);
|
||||
preparedSelect.setInt(4, enginePower);
|
||||
preparedSelect.setDouble(5, engineCapacity);
|
||||
preparedSelect.setString(6, fuel);
|
||||
preparedSelect.setString(7, transmission);
|
||||
preparedSelect.setString(8, drive);
|
||||
preparedSelect.setString(9, bodyType);
|
||||
preparedSelect.setInt(10, doors);
|
||||
preparedSelect.setInt(11, seats);
|
||||
|
||||
ResultSet rs = preparedSelect.executeQuery();
|
||||
if(rs.next()) {
|
||||
try {
|
||||
Long repeatedId = rs.getLong("id");
|
||||
Integer minPrice = rs.getInt("price_from");
|
||||
Integer maxPrice = rs.getInt("price_to");
|
||||
Long priceSum = rs.getLong("price_sum");
|
||||
Long counter = rs.getLong("counter");
|
||||
Double avarage;
|
||||
if (price >= maxPrice) {
|
||||
maxPrice = price;
|
||||
} else {
|
||||
minPrice = price;
|
||||
}
|
||||
priceSum = priceSum + price;
|
||||
counter = counter + 1;
|
||||
avarage = priceSum / (double) counter;
|
||||
|
||||
//update z nowymi danymi / zmiana tylko 5 parametrow
|
||||
String updateString = "" +
|
||||
"UPDATE car " +
|
||||
"SET price_from = ?, price_to = ?, price_sum = ?, avarage = ?, counter = ? WHERE id = ?";
|
||||
PreparedStatement updatePrepare = conn.prepareStatement(updateString);
|
||||
updatePrepare.setInt(1, minPrice);
|
||||
updatePrepare.setInt(2, maxPrice);
|
||||
updatePrepare.setLong(3, priceSum);
|
||||
updatePrepare.setDouble(4, avarage);
|
||||
updatePrepare.setLong(5, counter);
|
||||
updatePrepare.setLong(6, repeatedId);
|
||||
|
||||
int rows = updatePrepare.executeUpdate();
|
||||
//zaktualizowano
|
||||
System.out.println("Zauktualizowano: " + brand);
|
||||
}catch (Exception e){
|
||||
System.out.println("Błąd podczas aktualizacji ogłoszenia: " + e.getMessage());
|
||||
}
|
||||
}else{
|
||||
String sql = "INSERT INTO car (brand, model, version, year, engine_power, engine_capacity, fuel, transmission, drive, body_type, doors, seats, price_from, price_to, price_sum, counter, avarage) " +
|
||||
"values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
|
||||
|
||||
PreparedStatement preparedStatement = conn.prepareStatement(sql);
|
||||
preparedStatement.setString(1, brand);
|
||||
preparedStatement.setString(2, model);
|
||||
preparedStatement.setString(3, version);
|
||||
preparedStatement.setInt(4, year);
|
||||
preparedStatement.setInt(5, enginePower);
|
||||
preparedStatement.setDouble(6, engineCapacity);
|
||||
preparedStatement.setString(7, fuel);
|
||||
preparedStatement.setString(8, transmission);
|
||||
preparedStatement.setString(9, drive);
|
||||
preparedStatement.setString(10, bodyType);
|
||||
preparedStatement.setInt(11, doors);
|
||||
preparedStatement.setInt(12, seats);
|
||||
preparedStatement.setInt(13, price);
|
||||
preparedStatement.setInt(14, price);
|
||||
preparedStatement.setInt(15, price);
|
||||
preparedStatement.setInt(16, 1);
|
||||
preparedStatement.setDouble(17, price);
|
||||
|
||||
int row = preparedStatement.executeUpdate();
|
||||
System.out.println("Dodano: " + brand);
|
||||
|
||||
preparedStatement.close();
|
||||
conn.close();
|
||||
}
|
||||
}catch(SQLException se){
|
||||
se.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user