0

Загрузчик постеров

This commit is contained in:
2017-04-02 02:08:32 +03:00
parent 6e4a69df97
commit 78fba0f79b
10 changed files with 362 additions and 13 deletions

41
pom.xml
View File

@@ -20,6 +20,7 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version> <java.version>1.8</java.version>
<slf4j.version>1.7.21</slf4j.version> <slf4j.version>1.7.21</slf4j.version>
<spring.version>4.2.5.RELEASE</spring.version>
</properties> </properties>
<dependencies> <dependencies>
@@ -29,12 +30,52 @@
<artifactId>slf4j-api</artifactId> <artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version> <version>${slf4j.version}</version>
</dependency> </dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId> <artifactId>slf4j-simple</artifactId>
<version>${slf4j.version}</version> <version>${slf4j.version}</version>
<scope>runtime</scope> <scope>runtime</scope>
</dependency> </dependency>
<!-- SPRING -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>${spring.version}</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
<version>${spring.version}</version>
</dependency>
<!-- COMPONENTS -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
</dependencies> </dependencies>
<build> <build>

View File

@@ -1,13 +0,0 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3;
import org.slf4j.LoggerFactory;
public class Main {
public static void main(String[] args) {
LoggerFactory.getLogger("main").info("hello");
}
}

View File

@@ -0,0 +1,45 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.browser;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import java.io.IOException;
public class ApacheBrowser implements Browser {
private String encoding;
@Override
public void setEncoding(String encoding) {
this.encoding = encoding;
}
@Override
public String get(String url) {
HttpClient client = HttpClients.createDefault();
HttpGet request = new HttpGet(url);
setup_headers(request);
String result = "";
try {
HttpResponse response = client.execute(request);
result = IOUtils.toString(response.getEntity().getContent(), encoding);
} catch (IOException ignore) {
// ignore
}
return result;
}
private void setup_headers(HttpGet request) {
request.addHeader("Connection", "close");
request.addHeader("Accept-Encoding", "deflate");
request.addHeader("User-Agent", "Mozilla/5.0 (Linux; Android 4.2.2; GT-I9505 Build/JDQ39) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.59 Mobile Safari/537.36");
}
}

View File

@@ -0,0 +1,10 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.browser;
public interface Browser {
void setEncoding(String encoding);
String get(String url);
}

View File

@@ -0,0 +1,68 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.cinema.onlinelife;
import kinosearch.kinosearch3.browser.Browser;
import kinosearch.kinosearch3.spider.FileDownloader;
import kinosearch.kinosearch3.spider.ScannerCinema;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
public class ScannerImpl implements ScannerCinema {
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
private FileDownloader fileDownloader;
private Browser browser;
private File saveToDir;
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir) {
this.fileDownloader = fileDownloader;
this.browser = browser;
this.saveToDir = saveToDir;
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
}
}
@Override
public void run() {
int lastPage = getLastPage();
for (int page = lastPage; page > 0; page--) {
logger.info("page #{}...", page);
String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page));
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("slider-item");
elements.stream()
.map(element -> element.children().get(0))
.forEach(element -> {
String str = element.attr("href");
str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf("."));
String url = element.children().get(0).children().get(0).attr("src");
this.fileDownloader.addFile(url, new File(saveToDir, str + ".jpg"));
});
}
}
private int getLastPage() {
String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/");
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("navigation");
elements = elements.get(0).children();
Element element = elements.get(elements.size() - 2);
String[] parts = element.attr("href").split("/");
return Integer.parseInt(parts[parts.length-1]);
}
}

View File

@@ -0,0 +1,116 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.spider;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.channels.FileLock;
import java.util.AbstractMap;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class FileDownloader implements Runnable {
private Logger logger = LoggerFactory.getLogger(FileDownloader.class);
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
private final int threadCount;
private ThreadGroup threadGroup;
public FileDownloader(int threadCount, int capacity) {
this.threadCount = threadCount;
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
}
public void addFile(String url, File saveTo) {
try {
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
} catch (InterruptedException ignore) {
if(logger.isTraceEnabled()) {
logger.trace("cancel add file to queue");
}
}
}
void start() {
this.threadGroup = new ThreadGroup("FileDownloader");
for (int i = 1; i <= this.threadCount; i++) {
if (logger.isDebugEnabled()) {
logger.debug("Start '{}' thread", "Downloader #"+i);
}
(new Thread(threadGroup, this, "Downloader #"+i)).start();
}
}
public void stop() {
if (logger.isTraceEnabled()) {
logger.trace("Stopped threads");
}
this.threadGroup.interrupt();
}
@Override
public void run() {
Logger logger = LoggerFactory.getLogger(getClass());
while (!Thread.currentThread().isInterrupted()) {
Map.Entry<String, File> entry;
try {
entry = listOfEntries.take();
if (logger.isDebugEnabled()) {
logger.debug("take url for file '{}'", entry.getValue().getName());
}
} catch (InterruptedException e) {
if (logger.isTraceEnabled()) {
logger.trace("cancel take file by queue");
}
return;
}
FileLock fileLock = null;
URLConnection urlConnection = null;
try {
FileOutputStream fos = new FileOutputStream(entry.getValue());
fileLock = fos.getChannel().lock();
URL url = new URL(entry.getKey());
urlConnection = url.openConnection();
IOUtils.copy(urlConnection.getInputStream(), fos);
} catch (IOException e) {
logger.error("", e);
} finally {
if (fileLock != null) {
try {
fileLock.release();
} catch (IOException e) {
e.printStackTrace();
}
}
if (urlConnection != null) {
try {
urlConnection.getInputStream().close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (logger.isDebugEnabled()) {
logger.trace("file downloaded: '{}'", entry.getValue().getName());
}
}
}
if (logger.isTraceEnabled()) {
logger.debug("Thred closed");
}
}
}

View File

@@ -0,0 +1,16 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.spider;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
public class Main {
public static void main(String[] args) {
ApplicationContext ctx = new ClassPathXmlApplicationContext("/kinosearch/kinosearch3/spider/spring.xml");
Spider spider = ctx.getBean("spider", Spider.class);
spider.start();
}
}

View File

@@ -0,0 +1,9 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.spider;
public interface ScannerCinema {
void run();
}

View File

@@ -0,0 +1,23 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-02
*/
package kinosearch.kinosearch3.spider;
import java.util.Collections;
import java.util.List;
public class Spider {
private List<ScannerCinema> scanners;
private FileDownloader fileDownloader;
public Spider(List<ScannerCinema> scanners, FileDownloader fileDownloader) {
this.scanners = Collections.unmodifiableList(scanners);
this.fileDownloader = fileDownloader;
}
void start() {
fileDownloader.start();
scanners.forEach(ScannerCinema::run);
}
}

View File

@@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">
<bean id="fileDownloader" class="kinosearch.kinosearch3.spider.FileDownloader">
<constructor-arg name="threadCount" value="3"/>
<constructor-arg name="capacity" value="100"/>
</bean>
<bean id="browser" class="kinosearch.kinosearch3.browser.ApacheBrowser" scope="prototype">
<property name="encoding" value="utf-8"/>
</bean>
<bean id="onlinelifeScanner" class="kinosearch.kinosearch3.cinema.onlinelife.ScannerImpl">
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
<constructor-arg name="browser">
<bean parent="browser">
<property name="encoding" value="windows-1251"/>
</bean>
</constructor-arg>
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/>
</bean>
<bean id="spider" class="kinosearch.kinosearch3.spider.Spider">
<constructor-arg name="scanners">
<list>
<ref bean="onlinelifeScanner"/>
</list>
</constructor-arg>
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
</bean>
</beans>