0

Перепроектируем классы для возможности тестирования

This commit is contained in:
2017-04-07 01:33:10 +03:00
parent 82f26515ef
commit 369ba8aa6c
14 changed files with 436 additions and 160 deletions

View File

@@ -5,6 +5,7 @@
package kinosearch.kinosearch3.cinema.onlinelife;
import kinosearch.kinosearch3.browser.Browser;
import kinosearch.kinosearch3.spider.BaseRepository;
import kinosearch.kinosearch3.spider.CinemaDocument;
import kinosearch.kinosearch3.spider.FileDownloader;
import kinosearch.kinosearch3.spider.ScannerCinema;
@@ -12,27 +13,23 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.mongodb.core.MongoTemplate;
import java.io.File;
public class ScannerImpl implements ScannerCinema {
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
private FileDownloader fileDownloader;
private Browser browser;
private File saveToDir;
private MongoTemplate mongoTemplate;
private BaseRepository repository;
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir, MongoTemplate mongoTemplate) {
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir, BaseRepository repository) {
this.fileDownloader = fileDownloader;
this.browser = browser;
this.saveToDir = saveToDir;
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
}
this.mongoTemplate = mongoTemplate;
this.repository = repository;
}
@Override
@@ -45,30 +42,32 @@ public class ScannerImpl implements ScannerCinema {
int lastPage = getLastPage();
for (int page = lastPage; page > 0; page--) {
logger.info("page #{}...", page);
String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page));
String html = browser.get(String.format("http://www.online-life.cc/lastnews/page/%d/", page));
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("slider-item");
elements.stream()
.map(element -> element.children().get(0))
.forEach(element -> {
String str = element.attr("href");
str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf("."));
String url = element.children().get(0).children().get(0).attr("src");
File saveToFile = new File(saveToDir, str + ".jpg");
this.fileDownloader.addFile(url, saveToFile);
CinemaDocument cinemaDocument = new CinemaDocument(saveToFile.getAbsolutePath(), this.getName());
this.mongoTemplate.save(cinemaDocument);
});
.flatMap(element -> element.children().stream())
.filter(element -> element.tagName().equals("a"))
.map(element -> element.attr("href"))
.forEach(this::browseAndSave);
}
}
@Override
public void browseAndSave(String url) {
Document document = Jsoup.parse(browser.get(url));
Element element = document.getElementsByClass("full-poster").get(0);
File saveTo = new File(this.saveToDir, url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg");
this.fileDownloader.addFile(element.attr("src"), saveTo);
CinemaDocument cinemaDocument = new CinemaDocument(saveTo.getAbsolutePath(), this.getName());
repository.save(cinemaDocument);
}
private int getLastPage() {
String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/");
String html = browser.get("http://www.online-life.cc/lastnews/");
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("navigation");

View File

@@ -5,34 +5,32 @@
package kinosearch.kinosearch3.cinema.seasonvar;
import kinosearch.kinosearch3.browser.Browser;
import kinosearch.kinosearch3.spider.BaseRepository;
import kinosearch.kinosearch3.spider.CinemaDocument;
import kinosearch.kinosearch3.spider.FileDownloader;
import kinosearch.kinosearch3.spider.ScannerCinema;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.mongodb.core.MongoTemplate;
import java.io.File;
public class ScannerImpl implements ScannerCinema {
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
private static final String DOMAIN = "http://seasonvar.ru/";
private final FileDownloader fileDownloader;
private final Browser browser;
private final File saveTo;
private int i = 1;
private MongoTemplate mongoTemplate;
private final BaseRepository repository;
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveTo, MongoTemplate mongoTemplate) {
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveTo, BaseRepository repository) {
this.fileDownloader = fileDownloader;
this.browser = browser;
this.saveTo = saveTo;
if (!this.saveTo.mkdirs() && !this.saveTo.exists()) {
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveTo.getAbsolutePath()));
}
this.mongoTemplate = mongoTemplate;
this.repository = repository;
}
@Override
@@ -42,28 +40,25 @@ public class ScannerImpl implements ScannerCinema {
@Override
public void run() {
String html = browser.get("http://seasonvar.ru/");
String html = browser.get(DOMAIN);
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("betterT");
Elements elements = document.getElementsByAttribute("data-tabgr");
elements.stream()
.filter(element -> element.tagName().equals("div"))
.map(element -> element.children().get(0))
.forEach(element -> {
logger.info("element #{}", i++);
String str = element.attr("href");
str = str.substring(1, str.lastIndexOf("."));
.flatMap(element -> element.children().stream())
.map(element -> element.attr("href"))
.forEach(url -> this.browseAndSave(DOMAIN + url));
}
String str2 = element.attr("data");
str2 = browser.get("http://seasonvar.ru"+str2);
Document doc = Jsoup.parse(str2);
str2 = doc.getElementsByTag("img").get(0).attr("src");
@Override
public void browseAndSave(String url) {
Document document = Jsoup.parse(browser.get(url));
Element element = document.getElementsByAttributeValue("itemprop", "thumbnailUrl").get(0);
File saveToFile = new File(this.saveTo, str+".jpg");
this.fileDownloader.addFile(str2, saveToFile);
File saveTo = new File(this.saveTo, url.substring(url.indexOf("/")+1, url.lastIndexOf("."))+".jpg");
this.fileDownloader.addFile(element.attr("src"), saveTo);
CinemaDocument cinemaDocument = new CinemaDocument(saveToFile.getAbsolutePath(), this.getName());
this.mongoTemplate.save(cinemaDocument);
});
CinemaDocument cinemaDocument = new CinemaDocument(saveTo.getAbsolutePath(), this.getName());
this.repository.save(cinemaDocument);
}
}

View File

@@ -0,0 +1,9 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-06
*/
package kinosearch.kinosearch3.spider;
public interface BaseRepository {
void save(CinemaDocument cinemaDocument);
}

View File

@@ -1,116 +1,13 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
* 2017-04-06
*/
package kinosearch.kinosearch3.spider;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.channels.FileLock;
import java.util.AbstractMap;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class FileDownloader implements Runnable {
private Logger logger = LoggerFactory.getLogger(FileDownloader.class);
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
private final int threadCount;
private ThreadGroup threadGroup;
public FileDownloader(int threadCount, int capacity) {
this.threadCount = threadCount;
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
}
public void addFile(String url, File saveTo) {
try {
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
} catch (InterruptedException ignore) {
if(logger.isTraceEnabled()) {
logger.trace("cancel add file to queue");
}
}
}
void start() {
this.threadGroup = new ThreadGroup("FileDownloader");
for (int i = 1; i <= this.threadCount; i++) {
if (logger.isDebugEnabled()) {
logger.debug("Start '{}' thread", "Downloader #"+i);
}
(new Thread(threadGroup, this, "Downloader #"+i)).start();
}
}
public void stop() {
if (logger.isTraceEnabled()) {
logger.trace("Stopped threads");
}
this.threadGroup.interrupt();
}
@Override
public void run() {
Logger logger = LoggerFactory.getLogger(getClass());
while (!Thread.currentThread().isInterrupted()) {
Map.Entry<String, File> entry;
try {
entry = listOfEntries.take();
if (logger.isDebugEnabled()) {
logger.debug("take url for file '{}'", entry.getValue().getName());
}
} catch (InterruptedException e) {
if (logger.isTraceEnabled()) {
logger.trace("cancel take file by queue");
}
return;
}
FileLock fileLock = null;
URLConnection urlConnection = null;
try {
FileOutputStream fos = new FileOutputStream(entry.getValue());
fileLock = fos.getChannel().lock();
URL url = new URL(entry.getKey());
urlConnection = url.openConnection();
IOUtils.copy(urlConnection.getInputStream(), fos);
} catch (IOException e) {
logger.error("", e);
} finally {
if (fileLock != null) {
try {
fileLock.release();
} catch (IOException e) {
e.printStackTrace();
}
}
if (urlConnection != null) {
try {
urlConnection.getInputStream().close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (logger.isDebugEnabled()) {
logger.trace("file downloaded: '{}'", entry.getValue().getName());
}
}
}
if (logger.isTraceEnabled()) {
logger.debug("Thred closed");
}
}
public interface FileDownloader {
void addFile(String url, File saveTo);
void start();
void stop();
}

View File

@@ -0,0 +1,119 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.spider;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.channels.FileLock;
import java.util.AbstractMap;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class FileDownloaderImpl implements FileDownloader, Runnable {
private Logger logger = LoggerFactory.getLogger(FileDownloaderImpl.class);
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
private final int threadCount;
private ThreadGroup threadGroup;
public FileDownloaderImpl(int threadCount, int capacity) {
this.threadCount = threadCount;
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
}
@Override
public void addFile(String url, File saveTo) {
try {
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
} catch (InterruptedException ignore) {
if(logger.isTraceEnabled()) {
logger.trace("cancel add file to queue");
}
}
}
@Override
public void start() {
this.threadGroup = new ThreadGroup("FileDownloader");
for (int i = 1; i <= this.threadCount; i++) {
if (logger.isDebugEnabled()) {
logger.debug("Start '{}' thread", "Downloader #"+i);
}
(new Thread(threadGroup, this, "Downloader #"+i)).start();
}
}
@Override
public void stop() {
if (logger.isTraceEnabled()) {
logger.trace("Stopped threads");
}
this.threadGroup.interrupt();
}
@Override
public void run() {
Logger logger = LoggerFactory.getLogger(getClass());
while (!Thread.currentThread().isInterrupted()) {
Map.Entry<String, File> entry;
try {
entry = listOfEntries.take();
if (logger.isDebugEnabled()) {
logger.debug("take url for file '{}'", entry.getValue().getName());
}
} catch (InterruptedException e) {
if (logger.isTraceEnabled()) {
logger.trace("cancel take file by queue");
}
return;
}
FileLock fileLock = null;
URLConnection urlConnection = null;
try {
FileOutputStream fos = new FileOutputStream(entry.getValue());
fileLock = fos.getChannel().lock();
URL url = new URL(entry.getKey());
urlConnection = url.openConnection();
IOUtils.copy(urlConnection.getInputStream(), fos);
} catch (IOException e) {
logger.error("", e);
} finally {
if (fileLock != null) {
try {
fileLock.release();
} catch (IOException e) {
e.printStackTrace();
}
}
if (urlConnection != null) {
try {
urlConnection.getInputStream().close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (logger.isDebugEnabled()) {
logger.trace("file downloaded: '{}'", entry.getValue().getName());
}
}
}
if (logger.isTraceEnabled()) {
logger.debug("Thred closed");
}
}
}

View File

@@ -0,0 +1,20 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-06
*/
package kinosearch.kinosearch3.spider;
import org.springframework.data.mongodb.core.MongoOperations;
public class MongoDBRepository implements BaseRepository {
private final MongoOperations mongoOperations;
public MongoDBRepository(MongoOperations mongoOperations) {
this.mongoOperations = mongoOperations;
}
@Override
public void save(CinemaDocument cinemaDocument) {
mongoOperations.save(cinemaDocument);
}
}

View File

@@ -7,4 +7,5 @@ package kinosearch.kinosearch3.spider;
public interface ScannerCinema {
String getName();
void run();
void browseAndSave(String url);
}