Загрузчик постеров
This commit is contained in:
41
pom.xml
41
pom.xml
@@ -20,6 +20,7 @@
|
|||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<java.version>1.8</java.version>
|
<java.version>1.8</java.version>
|
||||||
<slf4j.version>1.7.21</slf4j.version>
|
<slf4j.version>1.7.21</slf4j.version>
|
||||||
|
<spring.version>4.2.5.RELEASE</spring.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
@@ -29,12 +30,52 @@
|
|||||||
<artifactId>slf4j-api</artifactId>
|
<artifactId>slf4j-api</artifactId>
|
||||||
<version>${slf4j.version}</version>
|
<version>${slf4j.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>jcl-over-slf4j</artifactId>
|
||||||
|
<version>${slf4j.version}</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
<artifactId>slf4j-simple</artifactId>
|
<artifactId>slf4j-simple</artifactId>
|
||||||
<version>${slf4j.version}</version>
|
<version>${slf4j.version}</version>
|
||||||
<scope>runtime</scope>
|
<scope>runtime</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- SPRING -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework</groupId>
|
||||||
|
<artifactId>spring-core</artifactId>
|
||||||
|
<version>${spring.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>commons-logging</groupId>
|
||||||
|
<artifactId>commons-logging</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework</groupId>
|
||||||
|
<artifactId>spring-context</artifactId>
|
||||||
|
<version>${spring.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- COMPONENTS -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-io</groupId>
|
||||||
|
<artifactId>commons-io</artifactId>
|
||||||
|
<version>2.5</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
<version>4.5.2</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.jsoup</groupId>
|
||||||
|
<artifactId>jsoup</artifactId>
|
||||||
|
<version>1.8.3</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@@ -1,13 +0,0 @@
|
|||||||
/*
|
|
||||||
* DmitriyMX <dimon550@gmail.com>
|
|
||||||
* 2017-04-01
|
|
||||||
*/
|
|
||||||
package kinosearch.kinosearch3;
|
|
||||||
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
public class Main {
|
|
||||||
public static void main(String[] args) {
|
|
||||||
LoggerFactory.getLogger("main").info("hello");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-01
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.browser;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.http.HttpResponse;
|
||||||
|
import org.apache.http.client.HttpClient;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class ApacheBrowser implements Browser {
|
||||||
|
private String encoding;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setEncoding(String encoding) {
|
||||||
|
this.encoding = encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String get(String url) {
|
||||||
|
HttpClient client = HttpClients.createDefault();
|
||||||
|
HttpGet request = new HttpGet(url);
|
||||||
|
setup_headers(request);
|
||||||
|
|
||||||
|
String result = "";
|
||||||
|
try {
|
||||||
|
HttpResponse response = client.execute(request);
|
||||||
|
result = IOUtils.toString(response.getEntity().getContent(), encoding);
|
||||||
|
} catch (IOException ignore) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setup_headers(HttpGet request) {
|
||||||
|
request.addHeader("Connection", "close");
|
||||||
|
request.addHeader("Accept-Encoding", "deflate");
|
||||||
|
request.addHeader("User-Agent", "Mozilla/5.0 (Linux; Android 4.2.2; GT-I9505 Build/JDQ39) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.59 Mobile Safari/537.36");
|
||||||
|
}
|
||||||
|
}
|
||||||
10
src/main/java/kinosearch/kinosearch3/browser/Browser.java
Normal file
10
src/main/java/kinosearch/kinosearch3/browser/Browser.java
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-01
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.browser;
|
||||||
|
|
||||||
|
public interface Browser {
|
||||||
|
void setEncoding(String encoding);
|
||||||
|
String get(String url);
|
||||||
|
}
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-01
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.cinema.onlinelife;
|
||||||
|
|
||||||
|
import kinosearch.kinosearch3.browser.Browser;
|
||||||
|
import kinosearch.kinosearch3.spider.FileDownloader;
|
||||||
|
import kinosearch.kinosearch3.spider.ScannerCinema;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
public class ScannerImpl implements ScannerCinema {
|
||||||
|
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
|
||||||
|
private FileDownloader fileDownloader;
|
||||||
|
private Browser browser;
|
||||||
|
private File saveToDir;
|
||||||
|
|
||||||
|
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir) {
|
||||||
|
this.fileDownloader = fileDownloader;
|
||||||
|
this.browser = browser;
|
||||||
|
this.saveToDir = saveToDir;
|
||||||
|
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
|
||||||
|
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
int lastPage = getLastPage();
|
||||||
|
|
||||||
|
for (int page = lastPage; page > 0; page--) {
|
||||||
|
logger.info("page #{}...", page);
|
||||||
|
String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page));
|
||||||
|
Document document = Jsoup.parse(html);
|
||||||
|
Elements elements = document.getElementsByClass("slider-item");
|
||||||
|
|
||||||
|
elements.stream()
|
||||||
|
.map(element -> element.children().get(0))
|
||||||
|
.forEach(element -> {
|
||||||
|
String str = element.attr("href");
|
||||||
|
str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf("."));
|
||||||
|
|
||||||
|
String url = element.children().get(0).children().get(0).attr("src");
|
||||||
|
|
||||||
|
this.fileDownloader.addFile(url, new File(saveToDir, str + ".jpg"));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getLastPage() {
|
||||||
|
String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/");
|
||||||
|
|
||||||
|
Document document = Jsoup.parse(html);
|
||||||
|
Elements elements = document.getElementsByClass("navigation");
|
||||||
|
elements = elements.get(0).children();
|
||||||
|
Element element = elements.get(elements.size() - 2);
|
||||||
|
|
||||||
|
String[] parts = element.attr("href").split("/");
|
||||||
|
return Integer.parseInt(parts[parts.length-1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
116
src/main/java/kinosearch/kinosearch3/spider/FileDownloader.java
Normal file
116
src/main/java/kinosearch/kinosearch3/spider/FileDownloader.java
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-01
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.spider;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.nio.channels.FileLock;
|
||||||
|
import java.util.AbstractMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
|
||||||
|
public class FileDownloader implements Runnable {
|
||||||
|
private Logger logger = LoggerFactory.getLogger(FileDownloader.class);
|
||||||
|
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
|
||||||
|
private final int threadCount;
|
||||||
|
private ThreadGroup threadGroup;
|
||||||
|
|
||||||
|
public FileDownloader(int threadCount, int capacity) {
|
||||||
|
this.threadCount = threadCount;
|
||||||
|
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addFile(String url, File saveTo) {
|
||||||
|
try {
|
||||||
|
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
|
||||||
|
} catch (InterruptedException ignore) {
|
||||||
|
if(logger.isTraceEnabled()) {
|
||||||
|
logger.trace("cancel add file to queue");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void start() {
|
||||||
|
this.threadGroup = new ThreadGroup("FileDownloader");
|
||||||
|
for (int i = 1; i <= this.threadCount; i++) {
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug("Start '{}' thread", "Downloader #"+i);
|
||||||
|
}
|
||||||
|
(new Thread(threadGroup, this, "Downloader #"+i)).start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stop() {
|
||||||
|
if (logger.isTraceEnabled()) {
|
||||||
|
logger.trace("Stopped threads");
|
||||||
|
}
|
||||||
|
this.threadGroup.interrupt();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
while (!Thread.currentThread().isInterrupted()) {
|
||||||
|
Map.Entry<String, File> entry;
|
||||||
|
try {
|
||||||
|
entry = listOfEntries.take();
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug("take url for file '{}'", entry.getValue().getName());
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
if (logger.isTraceEnabled()) {
|
||||||
|
logger.trace("cancel take file by queue");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FileLock fileLock = null;
|
||||||
|
URLConnection urlConnection = null;
|
||||||
|
try {
|
||||||
|
FileOutputStream fos = new FileOutputStream(entry.getValue());
|
||||||
|
fileLock = fos.getChannel().lock();
|
||||||
|
|
||||||
|
URL url = new URL(entry.getKey());
|
||||||
|
urlConnection = url.openConnection();
|
||||||
|
|
||||||
|
IOUtils.copy(urlConnection.getInputStream(), fos);
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("", e);
|
||||||
|
} finally {
|
||||||
|
if (fileLock != null) {
|
||||||
|
try {
|
||||||
|
fileLock.release();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (urlConnection != null) {
|
||||||
|
try {
|
||||||
|
urlConnection.getInputStream().close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.trace("file downloaded: '{}'", entry.getValue().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (logger.isTraceEnabled()) {
|
||||||
|
logger.debug("Thred closed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
16
src/main/java/kinosearch/kinosearch3/spider/Main.java
Normal file
16
src/main/java/kinosearch/kinosearch3/spider/Main.java
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-01
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.spider;
|
||||||
|
|
||||||
|
import org.springframework.context.ApplicationContext;
|
||||||
|
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||||
|
|
||||||
|
public class Main {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
ApplicationContext ctx = new ClassPathXmlApplicationContext("/kinosearch/kinosearch3/spider/spring.xml");
|
||||||
|
Spider spider = ctx.getBean("spider", Spider.class);
|
||||||
|
spider.start();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-01
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.spider;
|
||||||
|
|
||||||
|
public interface ScannerCinema {
|
||||||
|
void run();
|
||||||
|
}
|
||||||
23
src/main/java/kinosearch/kinosearch3/spider/Spider.java
Normal file
23
src/main/java/kinosearch/kinosearch3/spider/Spider.java
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-02
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.spider;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Spider {
|
||||||
|
private List<ScannerCinema> scanners;
|
||||||
|
private FileDownloader fileDownloader;
|
||||||
|
|
||||||
|
public Spider(List<ScannerCinema> scanners, FileDownloader fileDownloader) {
|
||||||
|
this.scanners = Collections.unmodifiableList(scanners);
|
||||||
|
this.fileDownloader = fileDownloader;
|
||||||
|
}
|
||||||
|
|
||||||
|
void start() {
|
||||||
|
fileDownloader.start();
|
||||||
|
scanners.forEach(ScannerCinema::run);
|
||||||
|
}
|
||||||
|
}
|
||||||
34
src/main/resources/kinosearch/kinosearch3/spider/spring.xml
Normal file
34
src/main/resources/kinosearch/kinosearch3/spider/spring.xml
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://www.springframework.org/schema/beans
|
||||||
|
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">
|
||||||
|
|
||||||
|
<bean id="fileDownloader" class="kinosearch.kinosearch3.spider.FileDownloader">
|
||||||
|
<constructor-arg name="threadCount" value="3"/>
|
||||||
|
<constructor-arg name="capacity" value="100"/>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<bean id="browser" class="kinosearch.kinosearch3.browser.ApacheBrowser" scope="prototype">
|
||||||
|
<property name="encoding" value="utf-8"/>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<bean id="onlinelifeScanner" class="kinosearch.kinosearch3.cinema.onlinelife.ScannerImpl">
|
||||||
|
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
||||||
|
<constructor-arg name="browser">
|
||||||
|
<bean parent="browser">
|
||||||
|
<property name="encoding" value="windows-1251"/>
|
||||||
|
</bean>
|
||||||
|
</constructor-arg>
|
||||||
|
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<bean id="spider" class="kinosearch.kinosearch3.spider.Spider">
|
||||||
|
<constructor-arg name="scanners">
|
||||||
|
<list>
|
||||||
|
<ref bean="onlinelifeScanner"/>
|
||||||
|
</list>
|
||||||
|
</constructor-arg>
|
||||||
|
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
||||||
|
</bean>
|
||||||
|
</beans>
|
||||||
Reference in New Issue
Block a user