From f502665234c776b3f3ba437693bd8ed6c0060cc1 Mon Sep 17 00:00:00 2001 From: DmitriyMX Date: Fri, 7 Apr 2017 18:11:50 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=D0=B4=D0=B3=D0=BE=D1=82=D0=BE?= =?UTF-8?q?=D0=B2=D0=BA=D0=B0=20Spider-=D0=B0=20=D0=BA=20=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D0=B8=D0=B7=D1=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kinosearch3/cinema/OnlinelifeScanner.java | 8 +++++++- .../kinosearch3/cinema/SeasonvarScanner.java | 12 +++++++++--- .../kinosearch/kinosearch3/spider/Spider.java | 18 +++++++++++++++++- src/main/resources/application.properties | 3 ++- .../kinosearch/kinosearch3/spider/spring.xml | 4 ++-- src/main/resources/log4j2.xml | 13 +++++++++++++ src/test/resources/log4j2.xml | 2 +- 7 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 src/main/resources/log4j2.xml diff --git a/src/main/java/kinosearch/kinosearch3/cinema/OnlinelifeScanner.java b/src/main/java/kinosearch/kinosearch3/cinema/OnlinelifeScanner.java index a5c435c..df49e25 100644 --- a/src/main/java/kinosearch/kinosearch3/cinema/OnlinelifeScanner.java +++ b/src/main/java/kinosearch/kinosearch3/cinema/OnlinelifeScanner.java @@ -13,10 +13,13 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; public class OnlinelifeScanner implements ScannerCinema { + private final Logger logger = LoggerFactory.getLogger(OnlinelifeScanner.class); private FileDownloader fileDownloader; private Browser browser; private File saveToDir; @@ -47,7 +50,10 @@ public class OnlinelifeScanner implements ScannerCinema { .flatMap(element -> element.children().stream()) .filter(element -> element.tagName().equals("a")) .map(element -> element.attr("href")) - .forEach(this::browseAndSave); + .forEach(url -> { + logger.info(url); + this.browseAndSave(url); + }); } } diff --git a/src/main/java/kinosearch/kinosearch3/cinema/SeasonvarScanner.java b/src/main/java/kinosearch/kinosearch3/cinema/SeasonvarScanner.java index 74e2bfe..cf94869 100644 --- a/src/main/java/kinosearch/kinosearch3/cinema/SeasonvarScanner.java +++ b/src/main/java/kinosearch/kinosearch3/cinema/SeasonvarScanner.java @@ -13,11 +13,14 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; public class SeasonvarScanner implements ScannerCinema { - private static final String DOMAIN = "http://seasonvar.ru/"; + private static final String DOMAIN = "http://seasonvar.ru"; + private final Logger logger = LoggerFactory.getLogger(SeasonvarScanner.class); private final FileDownloader fileDownloader; private final Browser browser; private final File saveTo; @@ -44,7 +47,10 @@ public class SeasonvarScanner implements ScannerCinema { elements.stream() .flatMap(element -> element.children().stream()) .map(element -> element.attr("href")) - .forEach(url -> this.browseAndSave(DOMAIN + url)); + .forEach(url -> { + logger.info(DOMAIN + url); + this.browseAndSave(DOMAIN + url); + }); } @Override @@ -52,7 +58,7 @@ public class SeasonvarScanner implements ScannerCinema { Document document = Jsoup.parse(browser.get(url)); Element element = document.getElementsByAttributeValue("itemprop", "thumbnailUrl").get(0); - File saveTo = new File(this.saveTo, url.substring(url.indexOf("/")+1, url.lastIndexOf("."))+".jpg"); + File saveTo = new File(this.saveTo, url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg"); this.fileDownloader.addFile(element.attr("src"), saveTo); CinemaDocument cinemaDocument = new CinemaDocument(); diff --git a/src/main/java/kinosearch/kinosearch3/spider/Spider.java b/src/main/java/kinosearch/kinosearch3/spider/Spider.java index df9c4d6..5809ee5 100644 --- a/src/main/java/kinosearch/kinosearch3/spider/Spider.java +++ b/src/main/java/kinosearch/kinosearch3/spider/Spider.java @@ -18,8 +18,24 @@ public class Spider { void start() { fileDownloader.start(); + + ThreadGroup threadGroup = new ThreadGroup("Scanners"); for (ScannerCinema scanner : scanners) { - (new Thread(scanner::run, "Scanner " + scanner.getName())).start(); + (new Thread(threadGroup, scanner::run, "Scanner " + scanner.getName())).start(); + } + + while (threadGroup.activeCount() > 0) { + sleep(); + } + + fileDownloader.stop(); + } + + private void sleep() { + try { + Thread.sleep(1000L); + } catch (InterruptedException e) { + // ignore } } } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 96f8981..3b1b093 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,3 +1,4 @@ mongo.host=127.0.0.1 mongo.port=27017 -mongo.db=kinosearch \ No newline at end of file +mongo.db=kinosearch +saveRootDir=R:\\ \ No newline at end of file diff --git a/src/main/resources/kinosearch/kinosearch3/spider/spring.xml b/src/main/resources/kinosearch/kinosearch3/spider/spring.xml index 8f66353..3543420 100644 --- a/src/main/resources/kinosearch/kinosearch3/spider/spring.xml +++ b/src/main/resources/kinosearch/kinosearch3/spider/spring.xml @@ -43,14 +43,14 @@ - + - + diff --git a/src/main/resources/log4j2.xml b/src/main/resources/log4j2.xml new file mode 100644 index 0000000..879b038 --- /dev/null +++ b/src/main/resources/log4j2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/log4j2.xml b/src/test/resources/log4j2.xml index 879b038..422b6e7 100644 --- a/src/test/resources/log4j2.xml +++ b/src/test/resources/log4j2.xml @@ -6,7 +6,7 @@ - +