get scores by product_id including pagination

This commit is contained in:
PawelJa 2019-05-27 18:55:22 +01:00
parent be14bf06c3
commit 6eed415262
2 changed files with 90 additions and 48 deletions

View File

@ -4,9 +4,13 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.SQLOutput;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
public class Main {
@ -19,19 +23,39 @@ public class Main {
getScoresFromPage(doc);
System.out.println("\n\n");
if (doc.selectFirst("div.pagination") != null && doc.selectFirst("div.pagination").selectFirst("li.arrow-next") != null) {
getScoresFromNextPage(Jsoup.connect("https://www.ceneo.pl/" + doc.selectFirst("div.pagination").selectFirst("li.arrow-next").selectFirst("a").attr("href")).get());
}
} catch (IOException e) {
}
return null;
}
private static Object getTextFromElement(Element element, String cssQuery) {
try {
private static void getScoresFromNextPage(Document document) {
getScoresFromPage(document);
if (document.selectFirst("div.pagination") != null && document.selectFirst("div.pagination").selectFirst("li.arrow-next") != null) {
try {
getScoresFromNextPage(Jsoup.connect("https://www.ceneo.pl/" + document.selectFirst("div.pagination").selectFirst("li.arrow-next").selectFirst("a").attr("href")).get());
} catch (IOException e) {
e.printStackTrace();
}
}
}
private static String getTextFromElement(Element element, String cssQuery) {
if (element.selectFirst(cssQuery) != null) {
return element.selectFirst(cssQuery).text();
} catch (NullPointerException e) {
return 0;
}
return null;
}
private static String getSizeOfElementsFromElement(Element element, String cssQuery) {
if (element.selectFirst(cssQuery) != null) {
return String.valueOf(element.select(cssQuery).select("li").size());
} else {
return null;
}
}
@ -39,47 +63,50 @@ public class Main {
Elements products = document.select("li.review-box");
for (Element product : products) {
System.out.println(getTextFromElement(product, "span.review-score-count"));
// System.out.println(product);
// Element score = product.selectFirst("span.review-score-count");
// try {
// System.out.println("score: " + score.text());
// } catch (NullPointerException e) {
// }
//
// Element description = product.selectFirst("p.product-review-body");
// try {
// System.out.println("description: \n" + description.text());
// } catch (NullPointerException e) {
// }
//
// try {
// Element advElement = product.selectFirst("div.pros-cell");
// Elements advCounter = advElement.select("li");
// System.out.println("adv: " + advCounter.size());
// } catch (NullPointerException e) {
// System.out.println("adv: 0");
// }
//
// try {
// Element dissadvElement = product.selectFirst("div.cons-cell");
// Elements dissadvCounter = dissadvElement.select("li");
// System.out.println("adv: " + dissadvCounter.size());
// } catch (NullPointerException e) {
// System.out.println("dissadv: 0");
// }
scoreList.add(new Score(
getTextFromElement(product, "span.review-score-count"),
getTextFromElement(product, "p.product-review-body"),
getSizeOfElementsFromElement(product, "div.pros-cell"),
getSizeOfElementsFromElement(product, "div.cons-cell")
));
}
}
private static void removeObjectsContainedNull() {
scoreList = scoreList.stream().filter( x ->
x.getScore() != null &&
x.getDescription() != null &&
x.getAdvantagesCounter() != null &&
x.getDissadvantagesCounter() != null)
.collect(Collectors.toList());
}
private static void saveListToFile() {
FileWriter fileWriter = null;
try {
fileWriter = new FileWriter("scores.txt");
} catch (IOException e) {
e.printStackTrace();
}
PrintWriter printWriter = new PrintWriter(fileWriter);
for(Score score: scoreList) {
printWriter.printf(score.toString());
}
printWriter.close();
}
public static void main(String[] args) throws IOException {
int[] productList = new int[]{29362313, 44279952};
int[] productList = new int[]{70021762, 26740026};
for (Integer item : productList) {
getScoresByItemId(item);
}
removeObjectsContainedNull();
System.out.println(scoreList.size());
saveListToFile();
}
}

View File

@ -1,16 +1,16 @@
package entities;
public class Score {
private float score;
private String score;
private String description;
private int advantagesCounter;
private int dissadvantagesCounter;
private String advantagesCounter;
private String dissadvantagesCounter;
public float getScore() {
public String getScore() {
return score;
}
public void setScore(float score) {
public void setScore(String score) {
this.score = score;
}
@ -22,19 +22,34 @@ public class Score {
this.description = description;
}
public int getAdvantagesCounter() {
public String getAdvantagesCounter() {
return advantagesCounter;
}
public void setAdvantagesCounter(int advantagesCounter) {
public void setAdvantagesCounter(String advantagesCounter) {
this.advantagesCounter = advantagesCounter;
}
public int getDissadvantagesCounter() {
public String getDissadvantagesCounter() {
return dissadvantagesCounter;
}
public void setDissadvantagesCounter(int dissadvantagesCounter) {
public void setDissadvantagesCounter(String dissadvantagesCounter) {
this.dissadvantagesCounter = dissadvantagesCounter;
}
public Score(String score, String description, String advantagesCounter, String dissadvantagesCounter) {
this.score = score;
this.description = description;
this.advantagesCounter = advantagesCounter;
this.dissadvantagesCounter = dissadvantagesCounter;
}
@Override
public String toString() {
return score +
";;;;;" + description + '\'' +
";;;;;" + advantagesCounter +
";;;;;" + dissadvantagesCounter + "\n";
}
}