Перепроектируем классы для возможности тестирования
This commit is contained in:
44
pom.xml
44
pom.xml
@@ -20,7 +20,7 @@
|
|||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<java.version>1.8</java.version>
|
<java.version>1.8</java.version>
|
||||||
<slf4j.version>1.7.21</slf4j.version>
|
<slf4j.version>1.7.21</slf4j.version>
|
||||||
<spring.version>4.2.5.RELEASE</spring.version>
|
<spring.version>4.3.7.RELEASE</spring.version>
|
||||||
<spring.mongodb.version>1.10.1.RELEASE</spring.mongodb.version>
|
<spring.mongodb.version>1.10.1.RELEASE</spring.mongodb.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
@@ -64,6 +64,16 @@
|
|||||||
<groupId>org.springframework.data</groupId>
|
<groupId>org.springframework.data</groupId>
|
||||||
<artifactId>spring-data-mongodb</artifactId>
|
<artifactId>spring-data-mongodb</artifactId>
|
||||||
<version>${spring.mongodb.version}</version>
|
<version>${spring.mongodb.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>jcl-over-slf4j</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- COMPONENTS -->
|
<!-- COMPONENTS -->
|
||||||
@@ -76,6 +86,12 @@
|
|||||||
<groupId>org.apache.httpcomponents</groupId>
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
<artifactId>httpclient</artifactId>
|
<artifactId>httpclient</artifactId>
|
||||||
<version>4.5.2</version>
|
<version>4.5.2</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>commons-logging</groupId>
|
||||||
|
<artifactId>commons-logging</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.jsoup</groupId>
|
<groupId>org.jsoup</groupId>
|
||||||
@@ -87,6 +103,32 @@
|
|||||||
<artifactId>lombok</artifactId>
|
<artifactId>lombok</artifactId>
|
||||||
<version>1.16.16</version>
|
<version>1.16.16</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- TESTS -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<version>4.12</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework</groupId>
|
||||||
|
<artifactId>spring-test</artifactId>
|
||||||
|
<version>${spring.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>de.flapdoodle.embed</groupId>
|
||||||
|
<artifactId>de.flapdoodle.embed.mongo</artifactId>
|
||||||
|
<version>2.0.0</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
package kinosearch.kinosearch3.cinema.onlinelife;
|
package kinosearch.kinosearch3.cinema.onlinelife;
|
||||||
|
|
||||||
import kinosearch.kinosearch3.browser.Browser;
|
import kinosearch.kinosearch3.browser.Browser;
|
||||||
|
import kinosearch.kinosearch3.spider.BaseRepository;
|
||||||
import kinosearch.kinosearch3.spider.CinemaDocument;
|
import kinosearch.kinosearch3.spider.CinemaDocument;
|
||||||
import kinosearch.kinosearch3.spider.FileDownloader;
|
import kinosearch.kinosearch3.spider.FileDownloader;
|
||||||
import kinosearch.kinosearch3.spider.ScannerCinema;
|
import kinosearch.kinosearch3.spider.ScannerCinema;
|
||||||
@@ -12,27 +13,23 @@ import org.jsoup.Jsoup;
|
|||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import org.springframework.data.mongodb.core.MongoTemplate;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
public class ScannerImpl implements ScannerCinema {
|
public class ScannerImpl implements ScannerCinema {
|
||||||
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
|
|
||||||
private FileDownloader fileDownloader;
|
private FileDownloader fileDownloader;
|
||||||
private Browser browser;
|
private Browser browser;
|
||||||
private File saveToDir;
|
private File saveToDir;
|
||||||
private MongoTemplate mongoTemplate;
|
private BaseRepository repository;
|
||||||
|
|
||||||
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir, MongoTemplate mongoTemplate) {
|
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir, BaseRepository repository) {
|
||||||
this.fileDownloader = fileDownloader;
|
this.fileDownloader = fileDownloader;
|
||||||
this.browser = browser;
|
this.browser = browser;
|
||||||
this.saveToDir = saveToDir;
|
this.saveToDir = saveToDir;
|
||||||
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
|
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
|
||||||
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
|
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
|
||||||
}
|
}
|
||||||
this.mongoTemplate = mongoTemplate;
|
this.repository = repository;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -45,30 +42,32 @@ public class ScannerImpl implements ScannerCinema {
|
|||||||
int lastPage = getLastPage();
|
int lastPage = getLastPage();
|
||||||
|
|
||||||
for (int page = lastPage; page > 0; page--) {
|
for (int page = lastPage; page > 0; page--) {
|
||||||
logger.info("page #{}...", page);
|
String html = browser.get(String.format("http://www.online-life.cc/lastnews/page/%d/", page));
|
||||||
String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page));
|
|
||||||
Document document = Jsoup.parse(html);
|
Document document = Jsoup.parse(html);
|
||||||
Elements elements = document.getElementsByClass("slider-item");
|
Elements elements = document.getElementsByClass("slider-item");
|
||||||
|
|
||||||
elements.stream()
|
elements.stream()
|
||||||
.map(element -> element.children().get(0))
|
.flatMap(element -> element.children().stream())
|
||||||
.forEach(element -> {
|
.filter(element -> element.tagName().equals("a"))
|
||||||
String str = element.attr("href");
|
.map(element -> element.attr("href"))
|
||||||
str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf("."));
|
.forEach(this::browseAndSave);
|
||||||
|
|
||||||
String url = element.children().get(0).children().get(0).attr("src");
|
|
||||||
|
|
||||||
File saveToFile = new File(saveToDir, str + ".jpg");
|
|
||||||
this.fileDownloader.addFile(url, saveToFile);
|
|
||||||
|
|
||||||
CinemaDocument cinemaDocument = new CinemaDocument(saveToFile.getAbsolutePath(), this.getName());
|
|
||||||
this.mongoTemplate.save(cinemaDocument);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void browseAndSave(String url) {
|
||||||
|
Document document = Jsoup.parse(browser.get(url));
|
||||||
|
Element element = document.getElementsByClass("full-poster").get(0);
|
||||||
|
|
||||||
|
File saveTo = new File(this.saveToDir, url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg");
|
||||||
|
this.fileDownloader.addFile(element.attr("src"), saveTo);
|
||||||
|
|
||||||
|
CinemaDocument cinemaDocument = new CinemaDocument(saveTo.getAbsolutePath(), this.getName());
|
||||||
|
repository.save(cinemaDocument);
|
||||||
|
}
|
||||||
|
|
||||||
private int getLastPage() {
|
private int getLastPage() {
|
||||||
String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/");
|
String html = browser.get("http://www.online-life.cc/lastnews/");
|
||||||
|
|
||||||
Document document = Jsoup.parse(html);
|
Document document = Jsoup.parse(html);
|
||||||
Elements elements = document.getElementsByClass("navigation");
|
Elements elements = document.getElementsByClass("navigation");
|
||||||
|
|||||||
@@ -5,34 +5,32 @@
|
|||||||
package kinosearch.kinosearch3.cinema.seasonvar;
|
package kinosearch.kinosearch3.cinema.seasonvar;
|
||||||
|
|
||||||
import kinosearch.kinosearch3.browser.Browser;
|
import kinosearch.kinosearch3.browser.Browser;
|
||||||
|
import kinosearch.kinosearch3.spider.BaseRepository;
|
||||||
import kinosearch.kinosearch3.spider.CinemaDocument;
|
import kinosearch.kinosearch3.spider.CinemaDocument;
|
||||||
import kinosearch.kinosearch3.spider.FileDownloader;
|
import kinosearch.kinosearch3.spider.FileDownloader;
|
||||||
import kinosearch.kinosearch3.spider.ScannerCinema;
|
import kinosearch.kinosearch3.spider.ScannerCinema;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import org.springframework.data.mongodb.core.MongoTemplate;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
public class ScannerImpl implements ScannerCinema {
|
public class ScannerImpl implements ScannerCinema {
|
||||||
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
|
private static final String DOMAIN = "http://seasonvar.ru/";
|
||||||
private final FileDownloader fileDownloader;
|
private final FileDownloader fileDownloader;
|
||||||
private final Browser browser;
|
private final Browser browser;
|
||||||
private final File saveTo;
|
private final File saveTo;
|
||||||
private int i = 1;
|
private final BaseRepository repository;
|
||||||
private MongoTemplate mongoTemplate;
|
|
||||||
|
|
||||||
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveTo, MongoTemplate mongoTemplate) {
|
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveTo, BaseRepository repository) {
|
||||||
this.fileDownloader = fileDownloader;
|
this.fileDownloader = fileDownloader;
|
||||||
this.browser = browser;
|
this.browser = browser;
|
||||||
this.saveTo = saveTo;
|
this.saveTo = saveTo;
|
||||||
if (!this.saveTo.mkdirs() && !this.saveTo.exists()) {
|
if (!this.saveTo.mkdirs() && !this.saveTo.exists()) {
|
||||||
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveTo.getAbsolutePath()));
|
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveTo.getAbsolutePath()));
|
||||||
}
|
}
|
||||||
this.mongoTemplate = mongoTemplate;
|
this.repository = repository;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -42,28 +40,25 @@ public class ScannerImpl implements ScannerCinema {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
String html = browser.get("http://seasonvar.ru/");
|
String html = browser.get(DOMAIN);
|
||||||
Document document = Jsoup.parse(html);
|
Document document = Jsoup.parse(html);
|
||||||
Elements elements = document.getElementsByClass("betterT");
|
Elements elements = document.getElementsByAttribute("data-tabgr");
|
||||||
|
|
||||||
elements.stream()
|
elements.stream()
|
||||||
.filter(element -> element.tagName().equals("div"))
|
.flatMap(element -> element.children().stream())
|
||||||
.map(element -> element.children().get(0))
|
.map(element -> element.attr("href"))
|
||||||
.forEach(element -> {
|
.forEach(url -> this.browseAndSave(DOMAIN + url));
|
||||||
logger.info("element #{}", i++);
|
}
|
||||||
String str = element.attr("href");
|
|
||||||
str = str.substring(1, str.lastIndexOf("."));
|
|
||||||
|
|
||||||
String str2 = element.attr("data");
|
@Override
|
||||||
str2 = browser.get("http://seasonvar.ru"+str2);
|
public void browseAndSave(String url) {
|
||||||
Document doc = Jsoup.parse(str2);
|
Document document = Jsoup.parse(browser.get(url));
|
||||||
str2 = doc.getElementsByTag("img").get(0).attr("src");
|
Element element = document.getElementsByAttributeValue("itemprop", "thumbnailUrl").get(0);
|
||||||
|
|
||||||
File saveToFile = new File(this.saveTo, str+".jpg");
|
File saveTo = new File(this.saveTo, url.substring(url.indexOf("/")+1, url.lastIndexOf("."))+".jpg");
|
||||||
this.fileDownloader.addFile(str2, saveToFile);
|
this.fileDownloader.addFile(element.attr("src"), saveTo);
|
||||||
|
|
||||||
CinemaDocument cinemaDocument = new CinemaDocument(saveToFile.getAbsolutePath(), this.getName());
|
CinemaDocument cinemaDocument = new CinemaDocument(saveTo.getAbsolutePath(), this.getName());
|
||||||
this.mongoTemplate.save(cinemaDocument);
|
this.repository.save(cinemaDocument);
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-06
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.spider;
|
||||||
|
|
||||||
|
public interface BaseRepository {
|
||||||
|
void save(CinemaDocument cinemaDocument);
|
||||||
|
}
|
||||||
@@ -1,116 +1,13 @@
|
|||||||
/*
|
/*
|
||||||
* DmitriyMX <dimon550@gmail.com>
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
* 2017-04-01
|
* 2017-04-06
|
||||||
*/
|
*/
|
||||||
package kinosearch.kinosearch3.spider;
|
package kinosearch.kinosearch3.spider;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.net.URLConnection;
|
|
||||||
import java.nio.channels.FileLock;
|
|
||||||
import java.util.AbstractMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
|
||||||
import java.util.concurrent.BlockingQueue;
|
|
||||||
|
|
||||||
public class FileDownloader implements Runnable {
|
public interface FileDownloader {
|
||||||
private Logger logger = LoggerFactory.getLogger(FileDownloader.class);
|
void addFile(String url, File saveTo);
|
||||||
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
|
void start();
|
||||||
private final int threadCount;
|
void stop();
|
||||||
private ThreadGroup threadGroup;
|
|
||||||
|
|
||||||
public FileDownloader(int threadCount, int capacity) {
|
|
||||||
this.threadCount = threadCount;
|
|
||||||
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addFile(String url, File saveTo) {
|
|
||||||
try {
|
|
||||||
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
|
|
||||||
} catch (InterruptedException ignore) {
|
|
||||||
if(logger.isTraceEnabled()) {
|
|
||||||
logger.trace("cancel add file to queue");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void start() {
|
|
||||||
this.threadGroup = new ThreadGroup("FileDownloader");
|
|
||||||
for (int i = 1; i <= this.threadCount; i++) {
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug("Start '{}' thread", "Downloader #"+i);
|
|
||||||
}
|
|
||||||
(new Thread(threadGroup, this, "Downloader #"+i)).start();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void stop() {
|
|
||||||
if (logger.isTraceEnabled()) {
|
|
||||||
logger.trace("Stopped threads");
|
|
||||||
}
|
|
||||||
this.threadGroup.interrupt();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
Logger logger = LoggerFactory.getLogger(getClass());
|
|
||||||
|
|
||||||
while (!Thread.currentThread().isInterrupted()) {
|
|
||||||
Map.Entry<String, File> entry;
|
|
||||||
try {
|
|
||||||
entry = listOfEntries.take();
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug("take url for file '{}'", entry.getValue().getName());
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
if (logger.isTraceEnabled()) {
|
|
||||||
logger.trace("cancel take file by queue");
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
FileLock fileLock = null;
|
|
||||||
URLConnection urlConnection = null;
|
|
||||||
try {
|
|
||||||
FileOutputStream fos = new FileOutputStream(entry.getValue());
|
|
||||||
fileLock = fos.getChannel().lock();
|
|
||||||
|
|
||||||
URL url = new URL(entry.getKey());
|
|
||||||
urlConnection = url.openConnection();
|
|
||||||
|
|
||||||
IOUtils.copy(urlConnection.getInputStream(), fos);
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.error("", e);
|
|
||||||
} finally {
|
|
||||||
if (fileLock != null) {
|
|
||||||
try {
|
|
||||||
fileLock.release();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (urlConnection != null) {
|
|
||||||
try {
|
|
||||||
urlConnection.getInputStream().close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.trace("file downloaded: '{}'", entry.getValue().getName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (logger.isTraceEnabled()) {
|
|
||||||
logger.debug("Thred closed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,119 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-01
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.spider;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.nio.channels.FileLock;
|
||||||
|
import java.util.AbstractMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
|
||||||
|
public class FileDownloaderImpl implements FileDownloader, Runnable {
|
||||||
|
private Logger logger = LoggerFactory.getLogger(FileDownloaderImpl.class);
|
||||||
|
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
|
||||||
|
private final int threadCount;
|
||||||
|
private ThreadGroup threadGroup;
|
||||||
|
|
||||||
|
public FileDownloaderImpl(int threadCount, int capacity) {
|
||||||
|
this.threadCount = threadCount;
|
||||||
|
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addFile(String url, File saveTo) {
|
||||||
|
try {
|
||||||
|
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
|
||||||
|
} catch (InterruptedException ignore) {
|
||||||
|
if(logger.isTraceEnabled()) {
|
||||||
|
logger.trace("cancel add file to queue");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void start() {
|
||||||
|
this.threadGroup = new ThreadGroup("FileDownloader");
|
||||||
|
for (int i = 1; i <= this.threadCount; i++) {
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug("Start '{}' thread", "Downloader #"+i);
|
||||||
|
}
|
||||||
|
(new Thread(threadGroup, this, "Downloader #"+i)).start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stop() {
|
||||||
|
if (logger.isTraceEnabled()) {
|
||||||
|
logger.trace("Stopped threads");
|
||||||
|
}
|
||||||
|
this.threadGroup.interrupt();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
while (!Thread.currentThread().isInterrupted()) {
|
||||||
|
Map.Entry<String, File> entry;
|
||||||
|
try {
|
||||||
|
entry = listOfEntries.take();
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug("take url for file '{}'", entry.getValue().getName());
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
if (logger.isTraceEnabled()) {
|
||||||
|
logger.trace("cancel take file by queue");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FileLock fileLock = null;
|
||||||
|
URLConnection urlConnection = null;
|
||||||
|
try {
|
||||||
|
FileOutputStream fos = new FileOutputStream(entry.getValue());
|
||||||
|
fileLock = fos.getChannel().lock();
|
||||||
|
|
||||||
|
URL url = new URL(entry.getKey());
|
||||||
|
urlConnection = url.openConnection();
|
||||||
|
|
||||||
|
IOUtils.copy(urlConnection.getInputStream(), fos);
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("", e);
|
||||||
|
} finally {
|
||||||
|
if (fileLock != null) {
|
||||||
|
try {
|
||||||
|
fileLock.release();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (urlConnection != null) {
|
||||||
|
try {
|
||||||
|
urlConnection.getInputStream().close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.trace("file downloaded: '{}'", entry.getValue().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (logger.isTraceEnabled()) {
|
||||||
|
logger.debug("Thred closed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-06
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.spider;
|
||||||
|
|
||||||
|
import org.springframework.data.mongodb.core.MongoOperations;
|
||||||
|
|
||||||
|
public class MongoDBRepository implements BaseRepository {
|
||||||
|
private final MongoOperations mongoOperations;
|
||||||
|
|
||||||
|
public MongoDBRepository(MongoOperations mongoOperations) {
|
||||||
|
this.mongoOperations = mongoOperations;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void save(CinemaDocument cinemaDocument) {
|
||||||
|
mongoOperations.save(cinemaDocument);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,4 +7,5 @@ package kinosearch.kinosearch3.spider;
|
|||||||
public interface ScannerCinema {
|
public interface ScannerCinema {
|
||||||
String getName();
|
String getName();
|
||||||
void run();
|
void run();
|
||||||
|
void browseAndSave(String url);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,11 @@
|
|||||||
<constructor-arg name="mongoDbFactory" ref="mongoDbFactory"/>
|
<constructor-arg name="mongoDbFactory" ref="mongoDbFactory"/>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
<bean id="fileDownloader" class="kinosearch.kinosearch3.spider.FileDownloader">
|
<bean id="repository" class="kinosearch.kinosearch3.spider.MongoDBRepository">
|
||||||
|
<constructor-arg name="mongoOperations" ref="mongoTemplate"/>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<bean id="fileDownloader" class="kinosearch.kinosearch3.spider.FileDownloaderImpl">
|
||||||
<constructor-arg name="threadCount" value="3"/>
|
<constructor-arg name="threadCount" value="3"/>
|
||||||
<constructor-arg name="capacity" value="100"/>
|
<constructor-arg name="capacity" value="100"/>
|
||||||
</bean>
|
</bean>
|
||||||
@@ -40,14 +44,14 @@
|
|||||||
</bean>
|
</bean>
|
||||||
</constructor-arg>
|
</constructor-arg>
|
||||||
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/>
|
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/>
|
||||||
<constructor-arg name="mongoTemplate" ref="mongoTemplate"/>
|
<constructor-arg name="repository" ref="repository"/>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
<bean id="seasonvarScanner" class="kinosearch.kinosearch3.cinema.seasonvar.ScannerImpl">
|
<bean id="seasonvarScanner" class="kinosearch.kinosearch3.cinema.seasonvar.ScannerImpl">
|
||||||
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
||||||
<constructor-arg name="browser" ref="browser"/>
|
<constructor-arg name="browser" ref="browser"/>
|
||||||
<constructor-arg name="saveTo" value="file:R:/seasonvar"/>
|
<constructor-arg name="saveTo" value="file:R:/seasonvar"/>
|
||||||
<constructor-arg name="mongoTemplate" ref="mongoTemplate"/>
|
<constructor-arg name="repository" ref="repository"/>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
<bean id="spider" class="kinosearch.kinosearch3.spider.Spider">
|
<bean id="spider" class="kinosearch.kinosearch3.spider.Spider">
|
||||||
|
|||||||
@@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-07
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.cinema;
|
||||||
|
|
||||||
|
import de.flapdoodle.embed.mongo.Command;
|
||||||
|
import de.flapdoodle.embed.mongo.MongodExecutable;
|
||||||
|
import de.flapdoodle.embed.mongo.MongodProcess;
|
||||||
|
import de.flapdoodle.embed.mongo.MongodStarter;
|
||||||
|
import de.flapdoodle.embed.mongo.config.IMongodConfig;
|
||||||
|
import de.flapdoodle.embed.mongo.config.MongodConfigBuilder;
|
||||||
|
import de.flapdoodle.embed.mongo.config.Net;
|
||||||
|
import de.flapdoodle.embed.mongo.config.RuntimeConfigBuilder;
|
||||||
|
import de.flapdoodle.embed.mongo.distribution.Version;
|
||||||
|
import de.flapdoodle.embed.process.config.IRuntimeConfig;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class AbstractScannerTest {
|
||||||
|
private static MongodProcess mongodProcess;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void initBase() throws IOException {
|
||||||
|
IRuntimeConfig runtimeConfig = new RuntimeConfigBuilder()
|
||||||
|
.defaultsWithLogger(Command.MongoD, LoggerFactory.getLogger("Mongod"))
|
||||||
|
.build();
|
||||||
|
MongodStarter starter = MongodStarter.getInstance(runtimeConfig);
|
||||||
|
|
||||||
|
IMongodConfig mongodConfig = new MongodConfigBuilder()
|
||||||
|
.version(Version.V3_0_5)
|
||||||
|
.net(new Net("127.0.0.1",27017, false))
|
||||||
|
.build();
|
||||||
|
MongodExecutable mongodExecutable = starter.prepare(mongodConfig);
|
||||||
|
mongodProcess = mongodExecutable.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void shutdownBase() {
|
||||||
|
mongodProcess.stop();
|
||||||
|
try {
|
||||||
|
Thread.sleep(5000L);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-06
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.cinema;
|
||||||
|
|
||||||
|
import kinosearch.kinosearch3.spider.FileDownloader;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
public class FakeFileDownloader implements FileDownloader {
|
||||||
|
@Override
|
||||||
|
public void addFile(String url, File saveTo) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void start() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stop() {
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-07
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.cinema.onlinelife;
|
||||||
|
|
||||||
|
import kinosearch.kinosearch3.cinema.AbstractScannerTest;
|
||||||
|
import kinosearch.kinosearch3.spider.CinemaDocument;
|
||||||
|
import org.junit.*;
|
||||||
|
import org.junit.runner.RunWith;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
|
import org.springframework.data.mongodb.core.MongoOperations;
|
||||||
|
import org.springframework.test.context.ContextConfiguration;
|
||||||
|
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
|
||||||
|
|
||||||
|
@RunWith(SpringJUnit4ClassRunner.class)
|
||||||
|
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
|
||||||
|
public class OnlinelifeScannerTest extends AbstractScannerTest {
|
||||||
|
@Autowired
|
||||||
|
@Qualifier("onlinelifeScanner")
|
||||||
|
private ScannerImpl scanner;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
@Qualifier("mongoTemplate")
|
||||||
|
private MongoOperations mongoOperations;
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void resetBase() {
|
||||||
|
mongoOperations.dropCollection("cinema");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void browseAndSaveTest() {
|
||||||
|
scanner.browseAndSave("http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html");
|
||||||
|
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
* DmitriyMX <dimon550@gmail.com>
|
||||||
|
* 2017-04-06
|
||||||
|
*/
|
||||||
|
package kinosearch.kinosearch3.cinema.seasonvar;
|
||||||
|
|
||||||
|
import com.mongodb.CommandResult;
|
||||||
|
import kinosearch.kinosearch3.cinema.AbstractScannerTest;
|
||||||
|
import kinosearch.kinosearch3.spider.CinemaDocument;
|
||||||
|
import org.junit.*;
|
||||||
|
import org.junit.runner.RunWith;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
|
import org.springframework.data.mongodb.core.MongoOperations;
|
||||||
|
import org.springframework.test.context.ContextConfiguration;
|
||||||
|
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
|
||||||
|
|
||||||
|
@RunWith(SpringJUnit4ClassRunner.class)
|
||||||
|
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
|
||||||
|
public class SeasonvarScannerTest extends AbstractScannerTest {
|
||||||
|
@Autowired
|
||||||
|
@Qualifier("seasonvarScanner")
|
||||||
|
private ScannerImpl scanner;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
@Qualifier("mongoTemplate")
|
||||||
|
private MongoOperations mongoOperations;
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void resetBase() {
|
||||||
|
mongoOperations.dropCollection("cinema");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void browseAndSaveTest() {
|
||||||
|
scanner.browseAndSave("http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html");
|
||||||
|
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xmlns:mongo="http://www.springframework.org/schema/data/mongo"
|
||||||
|
xsi:schemaLocation="http://www.springframework.org/schema/beans
|
||||||
|
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
|
||||||
|
http://www.springframework.org/schema/data/mongo
|
||||||
|
http://www.springframework.org/schema/data/mongo/spring-mongo.xsd">
|
||||||
|
|
||||||
|
<mongo:mongo id="mongo" host="127.0.0.1" port="27017"/>
|
||||||
|
<mongo:db-factory id="mongoDbFactory" dbname="fakeBase" mongo-ref="mongo"/>
|
||||||
|
|
||||||
|
<bean id="mongoTemplate" class="org.springframework.data.mongodb.core.MongoTemplate">
|
||||||
|
<constructor-arg name="mongoDbFactory" ref="mongoDbFactory"/>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<bean id="repository" class="kinosearch.kinosearch3.spider.MongoDBRepository">
|
||||||
|
<constructor-arg name="mongoOperations" ref="mongoTemplate"/>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<bean id="fileDownloader" class="kinosearch.kinosearch3.cinema.FakeFileDownloader"/>
|
||||||
|
|
||||||
|
<bean id="browser" class="kinosearch.kinosearch3.browser.ApacheBrowser" scope="prototype">
|
||||||
|
<property name="encoding" value="utf-8"/>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<bean id="onlinelifeScanner" class="kinosearch.kinosearch3.cinema.onlinelife.ScannerImpl">
|
||||||
|
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
||||||
|
<constructor-arg name="browser" ref="browser"/>
|
||||||
|
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/>
|
||||||
|
<constructor-arg name="repository" ref="repository"/>
|
||||||
|
</bean>
|
||||||
|
|
||||||
|
<bean id="seasonvarScanner" class="kinosearch.kinosearch3.cinema.seasonvar.ScannerImpl">
|
||||||
|
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
||||||
|
<constructor-arg name="browser" ref="browser"/>
|
||||||
|
<constructor-arg name="saveTo" value="file:R:/seasonvar"/>
|
||||||
|
<constructor-arg name="repository" ref="repository"/>
|
||||||
|
</bean>
|
||||||
|
</beans>
|
||||||
Reference in New Issue
Block a user