From b4a94e3dad34f985fdcd6cd1cd561d97da5c2f0f Mon Sep 17 00:00:00 2001 From: DmitriyMX Date: Sun, 2 Apr 2017 03:26:30 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9D=D0=BE=D0=B2=D1=8B=D0=B9=20=D0=BA=D0=B8?= =?UTF-8?q?=D0=BD=D0=BE=D1=82=D0=B5=D0=B0=D1=82=D1=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kinosearch3/browser/ApacheBrowser.java | 24 +++++++- .../kinosearch3/browser/Browser.java | 1 + .../cinema/onlinelife/ScannerImpl.java | 5 ++ .../cinema/seasonvar/ScannerImpl.java | 61 +++++++++++++++++++ .../kinosearch3/spider/ScannerCinema.java | 1 + .../kinosearch/kinosearch3/spider/Spider.java | 4 +- .../kinosearch/kinosearch3/spider/spring.xml | 7 +++ 7 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 src/main/java/kinosearch/kinosearch3/cinema/seasonvar/ScannerImpl.java diff --git a/src/main/java/kinosearch/kinosearch3/browser/ApacheBrowser.java b/src/main/java/kinosearch/kinosearch3/browser/ApacheBrowser.java index 9fdafab..40d313a 100644 --- a/src/main/java/kinosearch/kinosearch3/browser/ApacheBrowser.java +++ b/src/main/java/kinosearch/kinosearch3/browser/ApacheBrowser.java @@ -5,9 +5,12 @@ package kinosearch.kinosearch3.browser; import org.apache.commons.io.IOUtils; +import org.apache.http.HttpMessage; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.HttpClients; import java.io.IOException; @@ -37,7 +40,26 @@ public class ApacheBrowser implements Browser { return result; } - private void setup_headers(HttpGet request) { + @Override + public String post(String url, String data) { + HttpClient client = HttpClients.createDefault(); + HttpPost request = new HttpPost(url); + setup_headers(request); + request.addHeader("Content-Type", "application/x-www-form-urlencoded"); + + String result = ""; + try { + request.setEntity(new StringEntity(data)); + HttpResponse response = client.execute(request); + result = IOUtils.toString(response.getEntity().getContent(), encoding); + } catch (IOException ignore) { + // ignore + } + + return result; + } + + private void setup_headers(HttpMessage request) { request.addHeader("Connection", "close"); request.addHeader("Accept-Encoding", "deflate"); request.addHeader("User-Agent", "Mozilla/5.0 (Linux; Android 4.2.2; GT-I9505 Build/JDQ39) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.59 Mobile Safari/537.36"); diff --git a/src/main/java/kinosearch/kinosearch3/browser/Browser.java b/src/main/java/kinosearch/kinosearch3/browser/Browser.java index 07f57a8..7c777fe 100644 --- a/src/main/java/kinosearch/kinosearch3/browser/Browser.java +++ b/src/main/java/kinosearch/kinosearch3/browser/Browser.java @@ -7,4 +7,5 @@ package kinosearch.kinosearch3.browser; public interface Browser { void setEncoding(String encoding); String get(String url); + String post(String url, String data); } diff --git a/src/main/java/kinosearch/kinosearch3/cinema/onlinelife/ScannerImpl.java b/src/main/java/kinosearch/kinosearch3/cinema/onlinelife/ScannerImpl.java index 9d9c9b2..380d999 100644 --- a/src/main/java/kinosearch/kinosearch3/cinema/onlinelife/ScannerImpl.java +++ b/src/main/java/kinosearch/kinosearch3/cinema/onlinelife/ScannerImpl.java @@ -31,6 +31,11 @@ public class ScannerImpl implements ScannerCinema { } } + @Override + public String getName() { + return "OnlineLife"; + } + @Override public void run() { int lastPage = getLastPage(); diff --git a/src/main/java/kinosearch/kinosearch3/cinema/seasonvar/ScannerImpl.java b/src/main/java/kinosearch/kinosearch3/cinema/seasonvar/ScannerImpl.java new file mode 100644 index 0000000..fcfb951 --- /dev/null +++ b/src/main/java/kinosearch/kinosearch3/cinema/seasonvar/ScannerImpl.java @@ -0,0 +1,61 @@ +/* + * DmitriyMX + * 2017-04-02 + */ +package kinosearch.kinosearch3.cinema.seasonvar; + +import kinosearch.kinosearch3.browser.Browser; +import kinosearch.kinosearch3.spider.FileDownloader; +import kinosearch.kinosearch3.spider.ScannerCinema; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; + +public class ScannerImpl implements ScannerCinema { + private Logger logger = LoggerFactory.getLogger(ScannerImpl.class); + private final FileDownloader fileDownloader; + private final Browser browser; + private final File saveTo; + private int i = 1; + + public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveTo) { + this.fileDownloader = fileDownloader; + this.browser = browser; + this.saveTo = saveTo; + if (!this.saveTo.mkdirs() && !this.saveTo.exists()) { + throw new IllegalStateException(String.format("dir not found: '%s'", this.saveTo.getAbsolutePath())); + } + } + + @Override + public String getName() { + return "Seasonvar"; + } + + @Override + public void run() { + String html = browser.get("http://seasonvar.ru/"); + Document document = Jsoup.parse(html); + Elements elements = document.getElementsByClass("betterT"); + + elements.stream() + .filter(element -> element.tagName().equals("div")) + .map(element -> element.children().get(0)) + .forEach(element -> { + logger.info("element #{}", i++); + String str = element.attr("href"); + str = str.substring(1, str.lastIndexOf(".")); + + String str2 = element.attr("data"); + str2 = browser.get("http://seasonvar.ru"+str2); + Document doc = Jsoup.parse(str2); + str2 = doc.getElementsByTag("img").get(0).attr("src"); + + this.fileDownloader.addFile(str2, new File(this.saveTo, str+".jpg")); + }); + } +} diff --git a/src/main/java/kinosearch/kinosearch3/spider/ScannerCinema.java b/src/main/java/kinosearch/kinosearch3/spider/ScannerCinema.java index dcaa20e..6dc53f1 100644 --- a/src/main/java/kinosearch/kinosearch3/spider/ScannerCinema.java +++ b/src/main/java/kinosearch/kinosearch3/spider/ScannerCinema.java @@ -5,5 +5,6 @@ package kinosearch.kinosearch3.spider; public interface ScannerCinema { + String getName(); void run(); } diff --git a/src/main/java/kinosearch/kinosearch3/spider/Spider.java b/src/main/java/kinosearch/kinosearch3/spider/Spider.java index 618280f..df9c4d6 100644 --- a/src/main/java/kinosearch/kinosearch3/spider/Spider.java +++ b/src/main/java/kinosearch/kinosearch3/spider/Spider.java @@ -18,6 +18,8 @@ public class Spider { void start() { fileDownloader.start(); - scanners.forEach(ScannerCinema::run); + for (ScannerCinema scanner : scanners) { + (new Thread(scanner::run, "Scanner " + scanner.getName())).start(); + } } } diff --git a/src/main/resources/kinosearch/kinosearch3/spider/spring.xml b/src/main/resources/kinosearch/kinosearch3/spider/spring.xml index ee29be4..dd52712 100644 --- a/src/main/resources/kinosearch/kinosearch3/spider/spring.xml +++ b/src/main/resources/kinosearch/kinosearch3/spider/spring.xml @@ -23,10 +23,17 @@ + + + + + + +