Загрузчик постеров
This commit is contained in:
41
pom.xml
41
pom.xml
@@ -20,6 +20,7 @@
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<java.version>1.8</java.version>
|
||||
<slf4j.version>1.7.21</slf4j.version>
|
||||
<spring.version>4.2.5.RELEASE</spring.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
@@ -29,12 +30,52 @@
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>jcl-over-slf4j</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-simple</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- SPRING -->
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-core</artifactId>
|
||||
<version>${spring.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-context</artifactId>
|
||||
<version>${spring.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- COMPONENTS -->
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<version>4.5.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.8.3</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-01
|
||||
*/
|
||||
package kinosearch.kinosearch3;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class Main {
|
||||
public static void main(String[] args) {
|
||||
LoggerFactory.getLogger("main").info("hello");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-01
|
||||
*/
|
||||
package kinosearch.kinosearch3.browser;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class ApacheBrowser implements Browser {
|
||||
private String encoding;
|
||||
|
||||
@Override
|
||||
public void setEncoding(String encoding) {
|
||||
this.encoding = encoding;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String get(String url) {
|
||||
HttpClient client = HttpClients.createDefault();
|
||||
HttpGet request = new HttpGet(url);
|
||||
setup_headers(request);
|
||||
|
||||
String result = "";
|
||||
try {
|
||||
HttpResponse response = client.execute(request);
|
||||
result = IOUtils.toString(response.getEntity().getContent(), encoding);
|
||||
} catch (IOException ignore) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private void setup_headers(HttpGet request) {
|
||||
request.addHeader("Connection", "close");
|
||||
request.addHeader("Accept-Encoding", "deflate");
|
||||
request.addHeader("User-Agent", "Mozilla/5.0 (Linux; Android 4.2.2; GT-I9505 Build/JDQ39) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.59 Mobile Safari/537.36");
|
||||
}
|
||||
}
|
||||
10
src/main/java/kinosearch/kinosearch3/browser/Browser.java
Normal file
10
src/main/java/kinosearch/kinosearch3/browser/Browser.java
Normal file
@@ -0,0 +1,10 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-01
|
||||
*/
|
||||
package kinosearch.kinosearch3.browser;
|
||||
|
||||
public interface Browser {
|
||||
void setEncoding(String encoding);
|
||||
String get(String url);
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-01
|
||||
*/
|
||||
package kinosearch.kinosearch3.cinema.onlinelife;
|
||||
|
||||
import kinosearch.kinosearch3.browser.Browser;
|
||||
import kinosearch.kinosearch3.spider.FileDownloader;
|
||||
import kinosearch.kinosearch3.spider.ScannerCinema;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public class ScannerImpl implements ScannerCinema {
|
||||
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
|
||||
private FileDownloader fileDownloader;
|
||||
private Browser browser;
|
||||
private File saveToDir;
|
||||
|
||||
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir) {
|
||||
this.fileDownloader = fileDownloader;
|
||||
this.browser = browser;
|
||||
this.saveToDir = saveToDir;
|
||||
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
|
||||
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
int lastPage = getLastPage();
|
||||
|
||||
for (int page = lastPage; page > 0; page--) {
|
||||
logger.info("page #{}...", page);
|
||||
String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page));
|
||||
Document document = Jsoup.parse(html);
|
||||
Elements elements = document.getElementsByClass("slider-item");
|
||||
|
||||
elements.stream()
|
||||
.map(element -> element.children().get(0))
|
||||
.forEach(element -> {
|
||||
String str = element.attr("href");
|
||||
str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf("."));
|
||||
|
||||
String url = element.children().get(0).children().get(0).attr("src");
|
||||
|
||||
this.fileDownloader.addFile(url, new File(saveToDir, str + ".jpg"));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private int getLastPage() {
|
||||
String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/");
|
||||
|
||||
Document document = Jsoup.parse(html);
|
||||
Elements elements = document.getElementsByClass("navigation");
|
||||
elements = elements.get(0).children();
|
||||
Element element = elements.get(elements.size() - 2);
|
||||
|
||||
String[] parts = element.attr("href").split("/");
|
||||
return Integer.parseInt(parts[parts.length-1]);
|
||||
}
|
||||
}
|
||||
116
src/main/java/kinosearch/kinosearch3/spider/FileDownloader.java
Normal file
116
src/main/java/kinosearch/kinosearch3/spider/FileDownloader.java
Normal file
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-01
|
||||
*/
|
||||
package kinosearch.kinosearch3.spider;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.nio.channels.FileLock;
|
||||
import java.util.AbstractMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
public class FileDownloader implements Runnable {
|
||||
private Logger logger = LoggerFactory.getLogger(FileDownloader.class);
|
||||
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
|
||||
private final int threadCount;
|
||||
private ThreadGroup threadGroup;
|
||||
|
||||
public FileDownloader(int threadCount, int capacity) {
|
||||
this.threadCount = threadCount;
|
||||
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
|
||||
}
|
||||
|
||||
public void addFile(String url, File saveTo) {
|
||||
try {
|
||||
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
|
||||
} catch (InterruptedException ignore) {
|
||||
if(logger.isTraceEnabled()) {
|
||||
logger.trace("cancel add file to queue");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void start() {
|
||||
this.threadGroup = new ThreadGroup("FileDownloader");
|
||||
for (int i = 1; i <= this.threadCount; i++) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Start '{}' thread", "Downloader #"+i);
|
||||
}
|
||||
(new Thread(threadGroup, this, "Downloader #"+i)).start();
|
||||
}
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("Stopped threads");
|
||||
}
|
||||
this.threadGroup.interrupt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
while (!Thread.currentThread().isInterrupted()) {
|
||||
Map.Entry<String, File> entry;
|
||||
try {
|
||||
entry = listOfEntries.take();
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("take url for file '{}'", entry.getValue().getName());
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("cancel take file by queue");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
FileLock fileLock = null;
|
||||
URLConnection urlConnection = null;
|
||||
try {
|
||||
FileOutputStream fos = new FileOutputStream(entry.getValue());
|
||||
fileLock = fos.getChannel().lock();
|
||||
|
||||
URL url = new URL(entry.getKey());
|
||||
urlConnection = url.openConnection();
|
||||
|
||||
IOUtils.copy(urlConnection.getInputStream(), fos);
|
||||
} catch (IOException e) {
|
||||
logger.error("", e);
|
||||
} finally {
|
||||
if (fileLock != null) {
|
||||
try {
|
||||
fileLock.release();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
if (urlConnection != null) {
|
||||
try {
|
||||
urlConnection.getInputStream().close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.trace("file downloaded: '{}'", entry.getValue().getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.debug("Thred closed");
|
||||
}
|
||||
}
|
||||
}
|
||||
16
src/main/java/kinosearch/kinosearch3/spider/Main.java
Normal file
16
src/main/java/kinosearch/kinosearch3/spider/Main.java
Normal file
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-01
|
||||
*/
|
||||
package kinosearch.kinosearch3.spider;
|
||||
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||
|
||||
public class Main {
|
||||
public static void main(String[] args) {
|
||||
ApplicationContext ctx = new ClassPathXmlApplicationContext("/kinosearch/kinosearch3/spider/spring.xml");
|
||||
Spider spider = ctx.getBean("spider", Spider.class);
|
||||
spider.start();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-01
|
||||
*/
|
||||
package kinosearch.kinosearch3.spider;
|
||||
|
||||
public interface ScannerCinema {
|
||||
void run();
|
||||
}
|
||||
23
src/main/java/kinosearch/kinosearch3/spider/Spider.java
Normal file
23
src/main/java/kinosearch/kinosearch3/spider/Spider.java
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-02
|
||||
*/
|
||||
package kinosearch.kinosearch3.spider;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class Spider {
|
||||
private List<ScannerCinema> scanners;
|
||||
private FileDownloader fileDownloader;
|
||||
|
||||
public Spider(List<ScannerCinema> scanners, FileDownloader fileDownloader) {
|
||||
this.scanners = Collections.unmodifiableList(scanners);
|
||||
this.fileDownloader = fileDownloader;
|
||||
}
|
||||
|
||||
void start() {
|
||||
fileDownloader.start();
|
||||
scanners.forEach(ScannerCinema::run);
|
||||
}
|
||||
}
|
||||
34
src/main/resources/kinosearch/kinosearch3/spider/spring.xml
Normal file
34
src/main/resources/kinosearch/kinosearch3/spider/spring.xml
Normal file
@@ -0,0 +1,34 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">
|
||||
|
||||
<bean id="fileDownloader" class="kinosearch.kinosearch3.spider.FileDownloader">
|
||||
<constructor-arg name="threadCount" value="3"/>
|
||||
<constructor-arg name="capacity" value="100"/>
|
||||
</bean>
|
||||
|
||||
<bean id="browser" class="kinosearch.kinosearch3.browser.ApacheBrowser" scope="prototype">
|
||||
<property name="encoding" value="utf-8"/>
|
||||
</bean>
|
||||
|
||||
<bean id="onlinelifeScanner" class="kinosearch.kinosearch3.cinema.onlinelife.ScannerImpl">
|
||||
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
||||
<constructor-arg name="browser">
|
||||
<bean parent="browser">
|
||||
<property name="encoding" value="windows-1251"/>
|
||||
</bean>
|
||||
</constructor-arg>
|
||||
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/>
|
||||
</bean>
|
||||
|
||||
<bean id="spider" class="kinosearch.kinosearch3.spider.Spider">
|
||||
<constructor-arg name="scanners">
|
||||
<list>
|
||||
<ref bean="onlinelifeScanner"/>
|
||||
</list>
|
||||
</constructor-arg>
|
||||
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
||||
</bean>
|
||||
</beans>
|
||||
Reference in New Issue
Block a user