Merge remote-tracking branch 'origin/master'

This commit is contained in:
s452111 2020-01-19 16:24:58 +01:00
commit 649ec68b3f
44 changed files with 423 additions and 18830 deletions

View File

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>groupId</groupId>
<artifactId>Car4You_webscrapper</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<!-- jsoup HTML parser library @ https://jsoup.org/ -->
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/postgresql/postgresql -->
<dependency>
<groupId>postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>9.1-901-1.jdbc4</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,122 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.Hashtable;
public class CrawlerThread extends Thread {
public CrawlerThread(){
}
@Override
public void run() {
String pageUrl = "https://www.otomoto.pl/osobowe/?search%5Bfilter_enum_has_registration%5D=1&search%5Bfilter_enum_damaged%5D=0&search%5Bfilter_enum_registered%5D=1&search%5Bfilter_enum_no_accident%5D=1&search%5Border%5D=filter_float_price%3Aasc&search%5Bbrand_program_id%5D%5B0%5D=&search%5Bcountry%5D=&page=";
for (int i = 1; i <= 500; i++) {
Document doc = null;
System.out.println("Strona nr: " + i);
String targetUrl = pageUrl + Integer.valueOf(i);
System.out.println("Link: " + targetUrl);
Boolean success = false; //flaga do sprawdzania czy udało sie połączyc
while (!success) {
success = true;
try {
doc = Jsoup.connect(targetUrl).get();
} catch (IOException e) {
System.out.println("Nie udalo sie polaczyc. " + e.getMessage());
success = false;
}
}
Elements links = doc.select("a.offer-title__link");
for (Element element : links) {
Element photoDiv = element.parent().parent().parent().parent().selectFirst("div").selectFirst("span");
// System.out.println("Promowana: " + photoDiv);
if (photoDiv == null) { //todo sprawdzać czy oferta jest promowana
String url = element.attr("href");
System.out.println("Pobieram: " + url);
Document pageDoc = null;
success = false; //flaga do sprawdzania czy udało sie połączyc
while (!success) {
success = true;
try {
pageDoc = Jsoup.connect(url).get();
} catch (IOException e) {
System.out.println("Nie udalo sie polaczyc. " + e.getMessage());
success = false;
}
}
Elements offerParamsList = pageDoc.select("ul.offer-params__list").select("li.offer-params__item");
Hashtable<String, String> paramsHashtable = new Hashtable<String, String>();
String priceString = pageDoc.select("span.offer-price__number").first().text();
if(!priceString.contains(",")) {
priceString = priceString.substring(0, priceString.length() - 4);
priceString = priceString.replaceAll(" ", "");
}else{
priceString = priceString.replaceAll(",", "");
priceString = priceString.substring(0, priceString.length()-2);
priceString = priceString.substring(0, priceString.length() - 4);
priceString = priceString.replaceAll(" ", "");
}
Integer price = Integer.valueOf(priceString);
for (Element param : offerParamsList) { //wyciąganie wartości parametrów do tablicy
String key = param.select("span.offer-params__label").text();
String value = param.select("div.offer-params__value").text();
paramsHashtable.put(key, value);
}
try {
String brand = paramsHashtable.get("Marka pojazdu");
String model = paramsHashtable.get("Model pojazdu");
String version = paramsHashtable.get("Wersja");
Integer year = Integer.valueOf(paramsHashtable.get("Rok produkcji"));
Integer enginePower = Integer.valueOf(paramsHashtable.get("Moc").split(" ")[0]);
String engineCapacityString = paramsHashtable.get("Pojemność skokowa").replaceAll(" ", "");
engineCapacityString = engineCapacityString.substring(0, engineCapacityString.length() - 3);
Double engineCapacity = Double.valueOf(engineCapacityString);
String fuel = paramsHashtable.get("Rodzaj paliwa");
String transmission = paramsHashtable.get("Skrzynia biegów");
String drive = paramsHashtable.get("Napęd");
String bodyType = paramsHashtable.get("Typ");
Integer doors = Integer.valueOf(paramsHashtable.get("Liczba drzwi"));
Integer seats = Integer.valueOf(paramsHashtable.get("Liczba miejsc"));
if(year == null) year = 0;
if(version == null) version = "nieokreślono";
if(drive == null) drive = "nieokreślono";
if(transmission == null) transmission = "nieokreślono";
// System.out.println("Samochód: " +
// "\nmarka: " + brand +
// "\nmodel: " + model +
// "\nwersja: " + version +
// "\nrok produkcji: " + year +
// "\nmoc silnika: " + enginePower +
// "\npojemnosc skokoa: " + engineCapacity +
// "\nrodzaj paliwa; " + fuel +
// "\nskrzynia biegow: " + transmission +
// "\nnapęd: " + drive +
// "\ntyp nadwozia: " + bodyType +
// "\nliczba drzwi: " + doors +
// "\nliczba miejsc: " + seats +
// "\ncena: " + price);
dbService.insert(brand, model, version, year, enginePower, engineCapacity, fuel, transmission, drive, bodyType, doors, seats, price);
} catch (Exception e) {
System.out.println("Błąd pobierania danych!: " + e.getMessage());
}
// break;
}else{
System.out.println("Oferta promowana. Pomijam. "+element.text());
}
}
}
}
}

View File

@ -0,0 +1,21 @@
import java.util.ArrayList;
public class ThreadManager {
public static void main(String[] args) {
ArrayList<CrawlerThread> threads = new ArrayList();
//tworzenie watkow
for(int i = 0; i < 1; i++){
threads.add(new CrawlerThread());
}
//uruchomienie watkow
for(CrawlerThread t:threads){
t.start();
}
}
}

View File

@ -0,0 +1,115 @@
import java.sql.*;
public class dbService {
// JDBC driver name and database URL
static final String JDBC_DRIVER = "com.postgresql.jdbc.Driver";
static final String DB_URL = "";
// Database credentials
static final String USER = "";
static final String PASS = "";
public static void insert(String brand, String model, String version, Integer year, Integer enginePower, Double engineCapacity, String fuel, String transmission, String drive, String bodyType, Integer doors, Integer seats, Integer price) {
Connection conn = null;
try{
conn = DriverManager.getConnection(DB_URL,USER,PASS);
//najpierw sprwadzac czy takie auto juz jest
String selectString = "" +
"SELECT * " +
"FROM car " +
"WHERE " +
"brand = ? AND " +
"model = ? AND " +
"version = ? AND " +
"engine_power = ? AND " +
"engine_capacity = ? AND " +
"fuel = ? AND " +
"transmission = ? AND " +
"drive = ? AND " +
"body_type = ? AND " +
"doors = ? AND " +
"seats = ?";
PreparedStatement preparedSelect = conn.prepareStatement(selectString);
preparedSelect.setString(1, brand);
preparedSelect.setString(2, model);
preparedSelect.setString(3, version);
preparedSelect.setInt(4, enginePower);
preparedSelect.setDouble(5, engineCapacity);
preparedSelect.setString(6, fuel);
preparedSelect.setString(7, transmission);
preparedSelect.setString(8, drive);
preparedSelect.setString(9, bodyType);
preparedSelect.setInt(10, doors);
preparedSelect.setInt(11, seats);
ResultSet rs = preparedSelect.executeQuery();
if(rs.next()) {
try {
Long repeatedId = rs.getLong("id");
Integer minPrice = rs.getInt("price_from");
Integer maxPrice = rs.getInt("price_to");
Long priceSum = rs.getLong("price_sum");
Long counter = rs.getLong("counter");
Double avarage;
if (price >= maxPrice) {
maxPrice = price;
} else {
minPrice = price;
}
priceSum = priceSum + price;
counter = counter + 1;
avarage = priceSum / (double) counter;
//update z nowymi danymi / zmiana tylko 5 parametrow
String updateString = "" +
"UPDATE car " +
"SET price_from = ?, price_to = ?, price_sum = ?, avarage = ?, counter = ? WHERE id = ?";
PreparedStatement updatePrepare = conn.prepareStatement(updateString);
updatePrepare.setInt(1, minPrice);
updatePrepare.setInt(2, maxPrice);
updatePrepare.setLong(3, priceSum);
updatePrepare.setDouble(4, avarage);
updatePrepare.setLong(5, counter);
updatePrepare.setLong(6, repeatedId);
int rows = updatePrepare.executeUpdate();
//zaktualizowano
System.out.println("Zauktualizowano: " + brand);
}catch (Exception e){
System.out.println("Błąd podczas aktualizacji ogłoszenia: " + e.getMessage());
}
}else{
String sql = "INSERT INTO car (brand, model, version, year, engine_power, engine_capacity, fuel, transmission, drive, body_type, doors, seats, price_from, price_to, price_sum, counter, avarage) " +
"values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
PreparedStatement preparedStatement = conn.prepareStatement(sql);
preparedStatement.setString(1, brand);
preparedStatement.setString(2, model);
preparedStatement.setString(3, version);
preparedStatement.setInt(4, year);
preparedStatement.setInt(5, enginePower);
preparedStatement.setDouble(6, engineCapacity);
preparedStatement.setString(7, fuel);
preparedStatement.setString(8, transmission);
preparedStatement.setString(9, drive);
preparedStatement.setString(10, bodyType);
preparedStatement.setInt(11, doors);
preparedStatement.setInt(12, seats);
preparedStatement.setInt(13, price);
preparedStatement.setInt(14, price);
preparedStatement.setInt(15, price);
preparedStatement.setInt(16, 1);
preparedStatement.setDouble(17, price);
int row = preparedStatement.executeUpdate();
System.out.println("Dodano: " + brand);
preparedStatement.close();
conn.close();
}
}catch(SQLException se){
se.printStackTrace();
}
}
}

View File

Before

Width:  |  Height:  |  Size: 201 KiB

After

Width:  |  Height:  |  Size: 201 KiB

View File

Before

Width:  |  Height:  |  Size: 434 KiB

After

Width:  |  Height:  |  Size: 434 KiB

View File

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View File

Before

Width:  |  Height:  |  Size: 4.9 KiB

After

Width:  |  Height:  |  Size: 4.9 KiB

View File

Before

Width:  |  Height:  |  Size: 576 KiB

After

Width:  |  Height:  |  Size: 576 KiB

View File

@ -38,7 +38,7 @@
<ul class="nav-menu">
<li class="menu-active"><a href="index.html">Strona główna</a></li>
<li><a href="opis.html">O nas</a></li>
<li><a href="ankieta.html">Rekomendacja</a></li>
<li><a href="search.html">Rekomendacja</a></li>
<li><a href="lista.html">Galeria</a></li>
</ul>
</nav><!-- #nav-menu-container -->
@ -63,7 +63,7 @@
<p class="pt-10 pb-10 text-white">
W naszej wyszukiwarce go znajdziesz!
</p>
<a href="ankieta.html" class="primary-btn">Rozpocznij</a>
<a href="search.html" class="primary-btn">Rozpocznij</a>
</div>
</div>
</div>

View File

@ -0,0 +1,78 @@
function sendJSON() {
let result = document.querySelector('.result');
let fuel_type = document.getElementById('fuel_type');
let seats = document.getElementsByClassName('seats')[0];
let doors = document.getElementById('doors');
let maximum_price = document.getElementById('maximum_price');
let max_years_old = document.getElementById('max_years_old');
let engine_power_from = document.getElementById('engine_power_from');
let engine_power_to = document.getElementById('engine_power_to');
let body_type = document.getElementById('body_type');
let gearbox = document.getElementById('gearbox');
let drive_type = document.getElementById('drive_type');
let req = new XMLHttpRequest();
// let url = "https://jsonplaceholder.typicode.com/photos";
// let url = "https://webhook.site/b1792c3d-cd5a-4c29-8e9c-1ff9f84653eb";
let url = "http://34.65.132.148:8080";
var params = [];
if(fuel_type.value != -1){
params.push("fuel_type="+fuel_type.options[fuel_type.value].text);
}
if(seats.value > 0){
params.push("seats="+seats.value);
}
if(doors.value != -1){
params.push("doors="+doors.options[doors.value].text);
}
if(maximum_price.value > 0){
params.push("maximum_price="+maximum_price.value);
}
if(max_years_old.value > 0){
params.push("max_years_old="+max_years_old.value);
}
if(engine_power_from.value > 0){
params.push("engine_power_from="+engine_power_from.value);
}
if(engine_power_to.value > 0){
params.push("engine_power_to="+engine_power_to.value);
}
if(body_type.value != -1){
params.push("body_type="+body_type.value);
}
if(gearbox.value != -1){
params.push("gearbox="+gearbox.value);
}
if(drive_type.value != -1){
params.push("drive_type="+drive_type.value);
}
var buff = "";
if(params.length>0){
buff += "?";
for (var i = 0, len = params.length; i < len; i++) {
buff += params[i];
buff += "\&";
}
buff = buff.substr(0, buff.length-1); //obcinanie ostatego znaku
}
console.log("Debug: " + buff);
// Create a state change callback
req.open("GET", url+buff, true);
req.setRequestHeader("Content-Type", "text/html");
req.send();
req.onreadystatechange = function () {
if (req.readyState === 4 && req.status === 200) {
// Print received data from server
result.innerHTML = this.responseText;
}
};
}

View File

@ -31,15 +31,14 @@
</head>
<body>
<header id="header">
<div class="container main-menu">
<div class="row align-items-center justify-content-between d-flex ">
<div class="main-menu">
<div class="row align-items-center d-flex">
<a href="index.html"><img src="img/car4you.png" alt="" title="" width="100px" /></a>
<nav id="nav-menu-container">
<ul class="nav-menu">
<li class="menu-active"><a href="index.html">Strona główna</a></li>
<li><a href="opis.html">O nas</a></li>
<li><a href="ankieta.html">Rekomendacja</a></li>
<li><a href="lista.html">Galeria</a></li>
</ul>
</nav><!-- #nav-menu-container -->
</div>
@ -50,87 +49,76 @@
<section class="banner-area relative" id="home">
<div class="text-center">
<div class="overlay overlay-bg"></div>
<div class="container"></div>
<div class="row fullscreen d-flex align-items-center justify-content-between">
<div class="row fullscreen d-flex justify-content-between">
<div class="banner-content col-lg-12 col-md-12">
<div class="col-lg-10 col-md-10 header-right ">
<h4 class="pb-10">Uzupełnij pola</h4>
<form class="form" name="myForm" action="sendingRequest.js" method="post" enctype="application/x-www-form-urlencoded">
<div class="form-group ">
<div class="default-select" id="default-select">
<select name="fuel_type">
<option value="-1" disabled selected hidden>Rodzaj paliwa</option>
<div class="form-group">
Typ paliwa:
<div class="default-select">
<select id="fuel_type">
<option value="-1">Dowolny</option>
<option value="1">Benzyna</option>
<option value="2">LPG</option>
<option value="2">Benzyna+LPG</option>
<option value="3">Diesel</option>
</select>
</div>
</div>
<div class="form-group">
<div class="default-select" id="default-select">
<select name="seats">
<option value="-1" disabled selected hidden>Pojemność (liczba miejsc)</option>
<option value="1">2</option>
<option value="2">4</option>
<option value="3">5</option>
<option value="4">więcej niż 5</option>
</select>
Liczba miejsc:
<div class="default-select">
<input class="seats" id="seats">
</input>
</div>
</div>
<div class="form-group">
<div class="default-select" id="default-select">
<select name="doors">
<option value="-1" disabled selected hidden>Liczba drzwi</option>
<div class="default-select">
Liczba drzwi:
<select id="doors">
<option value="-1">Dowolna</option>
<option value="1">3</option>
<option value="2">4</option>
<option value="3">5</option>
<option value="2">5</option>
<option value="3">większa niż 5</option>
</select>
</div>
</div>
<div class="form-group">
<div class="default-select" id="default-select">
<select name="maximum_price">
<option value="-1" disabled selected hidden>Maksymalna cena</option>
<option value="5000">5 tys. zł</option>
<option value="10000">10 tys. zł </option>
<option value="15000">15 tys. zł</option>
<option value="20000">20 tys. zł</option>
<option value="25000">25 tys. zł</option>
<option value="30000">30 tys. zł</option>
</select>
<div class="default-select">
Budżet:
<input id="maximum_price">
</input>
</div>
</div>
<div class="form-group">
<div class="default-select" id="default-select">
<select name="max_years_old">
<option value="-1" disabled selected hidden>Maksymalna wiek</option>
<option value="1">1 rok</option>
<option value="2">2 lata</option>
<option value="3">3 lata</option>
<option value="6">6 lat</option>
<option value="8">8 lat</option>
<option value="10">więcej niż 10 lat</option>
</select>
<div class="default-select">
Maksymalny wiek pojazdu:
<input id="max_years_old">
</input>
</div>
</div>
<div class="form-group">
<div class="default-select" id="default-select">
<select name="engine_power">
<option value="-1" disabled selected hidden>Minimalna moc silnika</option>
<option value="60">60 KM</option>
<option value="100">100 KM</option>
<option value="150">150 KM</option>
<option value="200">200 KM</option>
<option value="250">250 KM</option>
</select>
<div class="default-select">
Moc od:
<input id="engine_power_from">
</input>
</div>
</div>
<div class="form-group">
<div class="default-select" id="default-select">
<select name="car_type">
<option value="-1" disabled selected hidden>Typ</option>
<div class="default-select">
Moc do:
<input id="engine_power_to">
</input>
</div>
</div>
<div class="form-group">
<div class="default-select">
Typ nadwozia:
<select id="body_type">
<option value="-1">Dowolny</option>
<option value="hetchback">Hetchback</option>
<option value="SUV">SUV</option>
<option value="coupe">Coupe</option>
@ -140,19 +128,21 @@
</div>
<div class="form-group">
<div class="default-select" id="default-select">
<select name="gearbox">
<option value="-1" disabled selected hidden>Skrzynia biegów</option>
<option value="manual">Ręczna</option>
<div class="default-select">
Skrzynia biegów:
<select id="gearbox">
<option value="-1">Dowolna</option>
<option value="manual">Manualna</option>
<option value="automatic">Automatyczna</option>
</select>
</div>
</div>
<div class="form-group">
<div class="default-select" id="default-select">
<select name="drive_type">
<option value="-1" disabled selected hidden>Napęd</option>
<div class="default-select">
Rodzaj napędu:
<select id="drive_type">
<option value="-1">Dowolny</option>
<option value="1">Na przednie koła</option>
<option value="2">Na tylnie koła</option>
<option value="3">Na cztery koła</option>
@ -161,7 +151,10 @@
</div>
<div class="form-group">
<input type="submit" value="wyslij" class="primary-btn" onclick="sendJSON()" />
<input value="Szukaj" class="primary-btn" onclick="sendJSON()" />
<div class="result">
<!-- result list-->
</div>
</div>
<div id="wypisywanie"></div>
</form>
@ -199,19 +192,7 @@
<img class="footer-bottom" alt="">
</footer>
<!-- End footer Area -->
<script src="js/vendor/jquery-2.2.4.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script>
<script src="js/vendor/bootstrap.min.js"></script>
<script src="https://maps.googleapis.com/maps/api/js?key=AIzaSyBhOdIF3Y9382fqJYt5I_sswSrEw5eihAA"></script>
<script src="js/easing.min.js"></script>
<script src="js/hoverIntent.js"></script>
<script src="js/superfish.min.js"></script>
<script src="js/jquery.ajaxchimp.min.js"></script>
<script src="js/jquery.magnific-popup.min.js"></script>
<script src="js/jquery-ui.js"></script>
<script src="js/jquery.nice-select.min.js"></script>
<script src="js/mail-script.js"></script>
<script src="js/main.js"></script>
<script src="sendingRequest.js"></script>
<script src="js/sendingRequest.js"></script>
</body>
</html>

View File

@ -1,44 +0,0 @@
function sendJSON() {
//let result = document.querySelector('.result');
let fuel_type = document.querySelector('#fuel_type');
let seats = document.querySelector('#seats');
let doors = document.querySelector('#doors');
let maximum_price = document.querySelector('#maximum_price');
let max_years_old = document.querySelector('#max_years_old');
let engine_power = document.querySelector('#engine_power');
let car_type = document.querySelector('#car_type');
let gearbox = document.querySelector('#gearbox');
let drive_type = document.querySelector('#drive_type');
let req = new XMLHttpRequest();
let url = "";
req.open("POST", url, true);
req.setRequestHeader("Content-Type", "application/json");
// Create a state change callback
//req.onreadystatechange = function () {
// if (req.readyState === 4 && req.status === 200) {
//
// Print received data from server
// result.innerHTML = this.responseText;
// }
// };
var data = JSON.stringify({
fuel_type: fuel_type.value,
seats: seats.value,
doors: doors.value,
maximum_price: maximum_price.value,
max_years_old: max_years_old.value,
engine_power: engine_power.value,
car_type: car_type.value,
gearbox: gearbox.value,
drive_type: drive_type
});
req.send(data);
}

File diff suppressed because it is too large Load Diff