Перепроектируем классы для возможности тестирования
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
package kinosearch.kinosearch3.cinema.onlinelife;
|
||||
|
||||
import kinosearch.kinosearch3.browser.Browser;
|
||||
import kinosearch.kinosearch3.spider.BaseRepository;
|
||||
import kinosearch.kinosearch3.spider.CinemaDocument;
|
||||
import kinosearch.kinosearch3.spider.FileDownloader;
|
||||
import kinosearch.kinosearch3.spider.ScannerCinema;
|
||||
@@ -12,27 +13,23 @@ import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.data.mongodb.core.MongoTemplate;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public class ScannerImpl implements ScannerCinema {
|
||||
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
|
||||
private FileDownloader fileDownloader;
|
||||
private Browser browser;
|
||||
private File saveToDir;
|
||||
private MongoTemplate mongoTemplate;
|
||||
private BaseRepository repository;
|
||||
|
||||
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir, MongoTemplate mongoTemplate) {
|
||||
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir, BaseRepository repository) {
|
||||
this.fileDownloader = fileDownloader;
|
||||
this.browser = browser;
|
||||
this.saveToDir = saveToDir;
|
||||
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
|
||||
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
|
||||
}
|
||||
this.mongoTemplate = mongoTemplate;
|
||||
this.repository = repository;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -45,30 +42,32 @@ public class ScannerImpl implements ScannerCinema {
|
||||
int lastPage = getLastPage();
|
||||
|
||||
for (int page = lastPage; page > 0; page--) {
|
||||
logger.info("page #{}...", page);
|
||||
String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page));
|
||||
String html = browser.get(String.format("http://www.online-life.cc/lastnews/page/%d/", page));
|
||||
Document document = Jsoup.parse(html);
|
||||
Elements elements = document.getElementsByClass("slider-item");
|
||||
|
||||
elements.stream()
|
||||
.map(element -> element.children().get(0))
|
||||
.forEach(element -> {
|
||||
String str = element.attr("href");
|
||||
str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf("."));
|
||||
|
||||
String url = element.children().get(0).children().get(0).attr("src");
|
||||
|
||||
File saveToFile = new File(saveToDir, str + ".jpg");
|
||||
this.fileDownloader.addFile(url, saveToFile);
|
||||
|
||||
CinemaDocument cinemaDocument = new CinemaDocument(saveToFile.getAbsolutePath(), this.getName());
|
||||
this.mongoTemplate.save(cinemaDocument);
|
||||
});
|
||||
.flatMap(element -> element.children().stream())
|
||||
.filter(element -> element.tagName().equals("a"))
|
||||
.map(element -> element.attr("href"))
|
||||
.forEach(this::browseAndSave);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void browseAndSave(String url) {
|
||||
Document document = Jsoup.parse(browser.get(url));
|
||||
Element element = document.getElementsByClass("full-poster").get(0);
|
||||
|
||||
File saveTo = new File(this.saveToDir, url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg");
|
||||
this.fileDownloader.addFile(element.attr("src"), saveTo);
|
||||
|
||||
CinemaDocument cinemaDocument = new CinemaDocument(saveTo.getAbsolutePath(), this.getName());
|
||||
repository.save(cinemaDocument);
|
||||
}
|
||||
|
||||
private int getLastPage() {
|
||||
String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/");
|
||||
String html = browser.get("http://www.online-life.cc/lastnews/");
|
||||
|
||||
Document document = Jsoup.parse(html);
|
||||
Elements elements = document.getElementsByClass("navigation");
|
||||
|
||||
@@ -5,34 +5,32 @@
|
||||
package kinosearch.kinosearch3.cinema.seasonvar;
|
||||
|
||||
import kinosearch.kinosearch3.browser.Browser;
|
||||
import kinosearch.kinosearch3.spider.BaseRepository;
|
||||
import kinosearch.kinosearch3.spider.CinemaDocument;
|
||||
import kinosearch.kinosearch3.spider.FileDownloader;
|
||||
import kinosearch.kinosearch3.spider.ScannerCinema;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.data.mongodb.core.MongoTemplate;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public class ScannerImpl implements ScannerCinema {
|
||||
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
|
||||
private static final String DOMAIN = "http://seasonvar.ru/";
|
||||
private final FileDownloader fileDownloader;
|
||||
private final Browser browser;
|
||||
private final File saveTo;
|
||||
private int i = 1;
|
||||
private MongoTemplate mongoTemplate;
|
||||
private final BaseRepository repository;
|
||||
|
||||
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveTo, MongoTemplate mongoTemplate) {
|
||||
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveTo, BaseRepository repository) {
|
||||
this.fileDownloader = fileDownloader;
|
||||
this.browser = browser;
|
||||
this.saveTo = saveTo;
|
||||
if (!this.saveTo.mkdirs() && !this.saveTo.exists()) {
|
||||
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveTo.getAbsolutePath()));
|
||||
}
|
||||
this.mongoTemplate = mongoTemplate;
|
||||
this.repository = repository;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -42,28 +40,25 @@ public class ScannerImpl implements ScannerCinema {
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
String html = browser.get("http://seasonvar.ru/");
|
||||
String html = browser.get(DOMAIN);
|
||||
Document document = Jsoup.parse(html);
|
||||
Elements elements = document.getElementsByClass("betterT");
|
||||
Elements elements = document.getElementsByAttribute("data-tabgr");
|
||||
|
||||
elements.stream()
|
||||
.filter(element -> element.tagName().equals("div"))
|
||||
.map(element -> element.children().get(0))
|
||||
.forEach(element -> {
|
||||
logger.info("element #{}", i++);
|
||||
String str = element.attr("href");
|
||||
str = str.substring(1, str.lastIndexOf("."));
|
||||
.flatMap(element -> element.children().stream())
|
||||
.map(element -> element.attr("href"))
|
||||
.forEach(url -> this.browseAndSave(DOMAIN + url));
|
||||
}
|
||||
|
||||
String str2 = element.attr("data");
|
||||
str2 = browser.get("http://seasonvar.ru"+str2);
|
||||
Document doc = Jsoup.parse(str2);
|
||||
str2 = doc.getElementsByTag("img").get(0).attr("src");
|
||||
@Override
|
||||
public void browseAndSave(String url) {
|
||||
Document document = Jsoup.parse(browser.get(url));
|
||||
Element element = document.getElementsByAttributeValue("itemprop", "thumbnailUrl").get(0);
|
||||
|
||||
File saveToFile = new File(this.saveTo, str+".jpg");
|
||||
this.fileDownloader.addFile(str2, saveToFile);
|
||||
File saveTo = new File(this.saveTo, url.substring(url.indexOf("/")+1, url.lastIndexOf("."))+".jpg");
|
||||
this.fileDownloader.addFile(element.attr("src"), saveTo);
|
||||
|
||||
CinemaDocument cinemaDocument = new CinemaDocument(saveToFile.getAbsolutePath(), this.getName());
|
||||
this.mongoTemplate.save(cinemaDocument);
|
||||
});
|
||||
CinemaDocument cinemaDocument = new CinemaDocument(saveTo.getAbsolutePath(), this.getName());
|
||||
this.repository.save(cinemaDocument);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-06
|
||||
*/
|
||||
package kinosearch.kinosearch3.spider;
|
||||
|
||||
public interface BaseRepository {
|
||||
void save(CinemaDocument cinemaDocument);
|
||||
}
|
||||
@@ -1,116 +1,13 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-01
|
||||
* 2017-04-06
|
||||
*/
|
||||
package kinosearch.kinosearch3.spider;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.nio.channels.FileLock;
|
||||
import java.util.AbstractMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
public class FileDownloader implements Runnable {
|
||||
private Logger logger = LoggerFactory.getLogger(FileDownloader.class);
|
||||
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
|
||||
private final int threadCount;
|
||||
private ThreadGroup threadGroup;
|
||||
|
||||
public FileDownloader(int threadCount, int capacity) {
|
||||
this.threadCount = threadCount;
|
||||
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
|
||||
}
|
||||
|
||||
public void addFile(String url, File saveTo) {
|
||||
try {
|
||||
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
|
||||
} catch (InterruptedException ignore) {
|
||||
if(logger.isTraceEnabled()) {
|
||||
logger.trace("cancel add file to queue");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void start() {
|
||||
this.threadGroup = new ThreadGroup("FileDownloader");
|
||||
for (int i = 1; i <= this.threadCount; i++) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Start '{}' thread", "Downloader #"+i);
|
||||
}
|
||||
(new Thread(threadGroup, this, "Downloader #"+i)).start();
|
||||
}
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("Stopped threads");
|
||||
}
|
||||
this.threadGroup.interrupt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
while (!Thread.currentThread().isInterrupted()) {
|
||||
Map.Entry<String, File> entry;
|
||||
try {
|
||||
entry = listOfEntries.take();
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("take url for file '{}'", entry.getValue().getName());
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("cancel take file by queue");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
FileLock fileLock = null;
|
||||
URLConnection urlConnection = null;
|
||||
try {
|
||||
FileOutputStream fos = new FileOutputStream(entry.getValue());
|
||||
fileLock = fos.getChannel().lock();
|
||||
|
||||
URL url = new URL(entry.getKey());
|
||||
urlConnection = url.openConnection();
|
||||
|
||||
IOUtils.copy(urlConnection.getInputStream(), fos);
|
||||
} catch (IOException e) {
|
||||
logger.error("", e);
|
||||
} finally {
|
||||
if (fileLock != null) {
|
||||
try {
|
||||
fileLock.release();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
if (urlConnection != null) {
|
||||
try {
|
||||
urlConnection.getInputStream().close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.trace("file downloaded: '{}'", entry.getValue().getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.debug("Thred closed");
|
||||
}
|
||||
}
|
||||
public interface FileDownloader {
|
||||
void addFile(String url, File saveTo);
|
||||
void start();
|
||||
void stop();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-01
|
||||
*/
|
||||
package kinosearch.kinosearch3.spider;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.nio.channels.FileLock;
|
||||
import java.util.AbstractMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
public class FileDownloaderImpl implements FileDownloader, Runnable {
|
||||
private Logger logger = LoggerFactory.getLogger(FileDownloaderImpl.class);
|
||||
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
|
||||
private final int threadCount;
|
||||
private ThreadGroup threadGroup;
|
||||
|
||||
public FileDownloaderImpl(int threadCount, int capacity) {
|
||||
this.threadCount = threadCount;
|
||||
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addFile(String url, File saveTo) {
|
||||
try {
|
||||
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
|
||||
} catch (InterruptedException ignore) {
|
||||
if(logger.isTraceEnabled()) {
|
||||
logger.trace("cancel add file to queue");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() {
|
||||
this.threadGroup = new ThreadGroup("FileDownloader");
|
||||
for (int i = 1; i <= this.threadCount; i++) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Start '{}' thread", "Downloader #"+i);
|
||||
}
|
||||
(new Thread(threadGroup, this, "Downloader #"+i)).start();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() {
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("Stopped threads");
|
||||
}
|
||||
this.threadGroup.interrupt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
while (!Thread.currentThread().isInterrupted()) {
|
||||
Map.Entry<String, File> entry;
|
||||
try {
|
||||
entry = listOfEntries.take();
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("take url for file '{}'", entry.getValue().getName());
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("cancel take file by queue");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
FileLock fileLock = null;
|
||||
URLConnection urlConnection = null;
|
||||
try {
|
||||
FileOutputStream fos = new FileOutputStream(entry.getValue());
|
||||
fileLock = fos.getChannel().lock();
|
||||
|
||||
URL url = new URL(entry.getKey());
|
||||
urlConnection = url.openConnection();
|
||||
|
||||
IOUtils.copy(urlConnection.getInputStream(), fos);
|
||||
} catch (IOException e) {
|
||||
logger.error("", e);
|
||||
} finally {
|
||||
if (fileLock != null) {
|
||||
try {
|
||||
fileLock.release();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
if (urlConnection != null) {
|
||||
try {
|
||||
urlConnection.getInputStream().close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.trace("file downloaded: '{}'", entry.getValue().getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.debug("Thred closed");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* DmitriyMX <dimon550@gmail.com>
|
||||
* 2017-04-06
|
||||
*/
|
||||
package kinosearch.kinosearch3.spider;
|
||||
|
||||
import org.springframework.data.mongodb.core.MongoOperations;
|
||||
|
||||
public class MongoDBRepository implements BaseRepository {
|
||||
private final MongoOperations mongoOperations;
|
||||
|
||||
public MongoDBRepository(MongoOperations mongoOperations) {
|
||||
this.mongoOperations = mongoOperations;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(CinemaDocument cinemaDocument) {
|
||||
mongoOperations.save(cinemaDocument);
|
||||
}
|
||||
}
|
||||
@@ -7,4 +7,5 @@ package kinosearch.kinosearch3.spider;
|
||||
public interface ScannerCinema {
|
||||
String getName();
|
||||
void run();
|
||||
void browseAndSave(String url);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user