0

Загрузчик постеров

This commit is contained in:
2017-04-02 02:08:32 +03:00
parent 6e4a69df97
commit 78fba0f79b
10 changed files with 362 additions and 13 deletions

View File

@@ -1,13 +0,0 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3;
import org.slf4j.LoggerFactory;
public class Main {
public static void main(String[] args) {
LoggerFactory.getLogger("main").info("hello");
}
}

View File

@@ -0,0 +1,45 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.browser;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import java.io.IOException;
public class ApacheBrowser implements Browser {
private String encoding;
@Override
public void setEncoding(String encoding) {
this.encoding = encoding;
}
@Override
public String get(String url) {
HttpClient client = HttpClients.createDefault();
HttpGet request = new HttpGet(url);
setup_headers(request);
String result = "";
try {
HttpResponse response = client.execute(request);
result = IOUtils.toString(response.getEntity().getContent(), encoding);
} catch (IOException ignore) {
// ignore
}
return result;
}
private void setup_headers(HttpGet request) {
request.addHeader("Connection", "close");
request.addHeader("Accept-Encoding", "deflate");
request.addHeader("User-Agent", "Mozilla/5.0 (Linux; Android 4.2.2; GT-I9505 Build/JDQ39) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.59 Mobile Safari/537.36");
}
}

View File

@@ -0,0 +1,10 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.browser;
public interface Browser {
void setEncoding(String encoding);
String get(String url);
}

View File

@@ -0,0 +1,68 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.cinema.onlinelife;
import kinosearch.kinosearch3.browser.Browser;
import kinosearch.kinosearch3.spider.FileDownloader;
import kinosearch.kinosearch3.spider.ScannerCinema;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
public class ScannerImpl implements ScannerCinema {
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
private FileDownloader fileDownloader;
private Browser browser;
private File saveToDir;
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir) {
this.fileDownloader = fileDownloader;
this.browser = browser;
this.saveToDir = saveToDir;
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
}
}
@Override
public void run() {
int lastPage = getLastPage();
for (int page = lastPage; page > 0; page--) {
logger.info("page #{}...", page);
String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page));
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("slider-item");
elements.stream()
.map(element -> element.children().get(0))
.forEach(element -> {
String str = element.attr("href");
str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf("."));
String url = element.children().get(0).children().get(0).attr("src");
this.fileDownloader.addFile(url, new File(saveToDir, str + ".jpg"));
});
}
}
private int getLastPage() {
String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/");
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("navigation");
elements = elements.get(0).children();
Element element = elements.get(elements.size() - 2);
String[] parts = element.attr("href").split("/");
return Integer.parseInt(parts[parts.length-1]);
}
}

View File

@@ -0,0 +1,116 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.spider;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.channels.FileLock;
import java.util.AbstractMap;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class FileDownloader implements Runnable {
private Logger logger = LoggerFactory.getLogger(FileDownloader.class);
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
private final int threadCount;
private ThreadGroup threadGroup;
public FileDownloader(int threadCount, int capacity) {
this.threadCount = threadCount;
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
}
public void addFile(String url, File saveTo) {
try {
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
} catch (InterruptedException ignore) {
if(logger.isTraceEnabled()) {
logger.trace("cancel add file to queue");
}
}
}
void start() {
this.threadGroup = new ThreadGroup("FileDownloader");
for (int i = 1; i <= this.threadCount; i++) {
if (logger.isDebugEnabled()) {
logger.debug("Start '{}' thread", "Downloader #"+i);
}
(new Thread(threadGroup, this, "Downloader #"+i)).start();
}
}
public void stop() {
if (logger.isTraceEnabled()) {
logger.trace("Stopped threads");
}
this.threadGroup.interrupt();
}
@Override
public void run() {
Logger logger = LoggerFactory.getLogger(getClass());
while (!Thread.currentThread().isInterrupted()) {
Map.Entry<String, File> entry;
try {
entry = listOfEntries.take();
if (logger.isDebugEnabled()) {
logger.debug("take url for file '{}'", entry.getValue().getName());
}
} catch (InterruptedException e) {
if (logger.isTraceEnabled()) {
logger.trace("cancel take file by queue");
}
return;
}
FileLock fileLock = null;
URLConnection urlConnection = null;
try {
FileOutputStream fos = new FileOutputStream(entry.getValue());
fileLock = fos.getChannel().lock();
URL url = new URL(entry.getKey());
urlConnection = url.openConnection();
IOUtils.copy(urlConnection.getInputStream(), fos);
} catch (IOException e) {
logger.error("", e);
} finally {
if (fileLock != null) {
try {
fileLock.release();
} catch (IOException e) {
e.printStackTrace();
}
}
if (urlConnection != null) {
try {
urlConnection.getInputStream().close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (logger.isDebugEnabled()) {
logger.trace("file downloaded: '{}'", entry.getValue().getName());
}
}
}
if (logger.isTraceEnabled()) {
logger.debug("Thred closed");
}
}
}

View File

@@ -0,0 +1,16 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.spider;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
public class Main {
public static void main(String[] args) {
ApplicationContext ctx = new ClassPathXmlApplicationContext("/kinosearch/kinosearch3/spider/spring.xml");
Spider spider = ctx.getBean("spider", Spider.class);
spider.start();
}
}

View File

@@ -0,0 +1,9 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.spider;
public interface ScannerCinema {
void run();
}

View File

@@ -0,0 +1,23 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-02
*/
package kinosearch.kinosearch3.spider;
import java.util.Collections;
import java.util.List;
public class Spider {
private List<ScannerCinema> scanners;
private FileDownloader fileDownloader;
public Spider(List<ScannerCinema> scanners, FileDownloader fileDownloader) {
this.scanners = Collections.unmodifiableList(scanners);
this.fileDownloader = fileDownloader;
}
void start() {
fileDownloader.start();
scanners.forEach(ScannerCinema::run);
}
}