0

Загрузчик постеров

This commit is contained in:
2017-04-02 02:08:32 +03:00
parent 6e4a69df97
commit 78fba0f79b
10 changed files with 362 additions and 13 deletions

View File

@@ -0,0 +1,68 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.cinema.onlinelife;
import kinosearch.kinosearch3.browser.Browser;
import kinosearch.kinosearch3.spider.FileDownloader;
import kinosearch.kinosearch3.spider.ScannerCinema;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
public class ScannerImpl implements ScannerCinema {
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
private FileDownloader fileDownloader;
private Browser browser;
private File saveToDir;
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir) {
this.fileDownloader = fileDownloader;
this.browser = browser;
this.saveToDir = saveToDir;
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
}
}
@Override
public void run() {
int lastPage = getLastPage();
for (int page = lastPage; page > 0; page--) {
logger.info("page #{}...", page);
String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page));
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("slider-item");
elements.stream()
.map(element -> element.children().get(0))
.forEach(element -> {
String str = element.attr("href");
str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf("."));
String url = element.children().get(0).children().get(0).attr("src");
this.fileDownloader.addFile(url, new File(saveToDir, str + ".jpg"));
});
}
}
private int getLastPage() {
String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/");
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("navigation");
elements = elements.get(0).children();
Element element = elements.get(elements.size() - 2);
String[] parts = element.attr("href").split("/");
return Integer.parseInt(parts[parts.length-1]);
}
}