0

Перепроектируем классы для возможности тестирования

This commit is contained in:
2017-04-07 01:33:10 +03:00
parent 82f26515ef
commit 369ba8aa6c
14 changed files with 436 additions and 160 deletions

View File

@@ -5,6 +5,7 @@
package kinosearch.kinosearch3.cinema.onlinelife;
import kinosearch.kinosearch3.browser.Browser;
import kinosearch.kinosearch3.spider.BaseRepository;
import kinosearch.kinosearch3.spider.CinemaDocument;
import kinosearch.kinosearch3.spider.FileDownloader;
import kinosearch.kinosearch3.spider.ScannerCinema;
@@ -12,27 +13,23 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.mongodb.core.MongoTemplate;
import java.io.File;
public class ScannerImpl implements ScannerCinema {
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
private FileDownloader fileDownloader;
private Browser browser;
private File saveToDir;
private MongoTemplate mongoTemplate;
private BaseRepository repository;
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir, MongoTemplate mongoTemplate) {
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir, BaseRepository repository) {
this.fileDownloader = fileDownloader;
this.browser = browser;
this.saveToDir = saveToDir;
if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) {
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath()));
}
this.mongoTemplate = mongoTemplate;
this.repository = repository;
}
@Override
@@ -45,30 +42,32 @@ public class ScannerImpl implements ScannerCinema {
int lastPage = getLastPage();
for (int page = lastPage; page > 0; page--) {
logger.info("page #{}...", page);
String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page));
String html = browser.get(String.format("http://www.online-life.cc/lastnews/page/%d/", page));
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("slider-item");
elements.stream()
.map(element -> element.children().get(0))
.forEach(element -> {
String str = element.attr("href");
str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf("."));
String url = element.children().get(0).children().get(0).attr("src");
File saveToFile = new File(saveToDir, str + ".jpg");
this.fileDownloader.addFile(url, saveToFile);
CinemaDocument cinemaDocument = new CinemaDocument(saveToFile.getAbsolutePath(), this.getName());
this.mongoTemplate.save(cinemaDocument);
});
.flatMap(element -> element.children().stream())
.filter(element -> element.tagName().equals("a"))
.map(element -> element.attr("href"))
.forEach(this::browseAndSave);
}
}
@Override
public void browseAndSave(String url) {
Document document = Jsoup.parse(browser.get(url));
Element element = document.getElementsByClass("full-poster").get(0);
File saveTo = new File(this.saveToDir, url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg");
this.fileDownloader.addFile(element.attr("src"), saveTo);
CinemaDocument cinemaDocument = new CinemaDocument(saveTo.getAbsolutePath(), this.getName());
repository.save(cinemaDocument);
}
private int getLastPage() {
String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/");
String html = browser.get("http://www.online-life.cc/lastnews/");
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("navigation");

View File

@@ -5,34 +5,32 @@
package kinosearch.kinosearch3.cinema.seasonvar;
import kinosearch.kinosearch3.browser.Browser;
import kinosearch.kinosearch3.spider.BaseRepository;
import kinosearch.kinosearch3.spider.CinemaDocument;
import kinosearch.kinosearch3.spider.FileDownloader;
import kinosearch.kinosearch3.spider.ScannerCinema;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.mongodb.core.MongoTemplate;
import java.io.File;
public class ScannerImpl implements ScannerCinema {
private Logger logger = LoggerFactory.getLogger(ScannerImpl.class);
private static final String DOMAIN = "http://seasonvar.ru/";
private final FileDownloader fileDownloader;
private final Browser browser;
private final File saveTo;
private int i = 1;
private MongoTemplate mongoTemplate;
private final BaseRepository repository;
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveTo, MongoTemplate mongoTemplate) {
public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveTo, BaseRepository repository) {
this.fileDownloader = fileDownloader;
this.browser = browser;
this.saveTo = saveTo;
if (!this.saveTo.mkdirs() && !this.saveTo.exists()) {
throw new IllegalStateException(String.format("dir not found: '%s'", this.saveTo.getAbsolutePath()));
}
this.mongoTemplate = mongoTemplate;
this.repository = repository;
}
@Override
@@ -42,28 +40,25 @@ public class ScannerImpl implements ScannerCinema {
@Override
public void run() {
String html = browser.get("http://seasonvar.ru/");
String html = browser.get(DOMAIN);
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByClass("betterT");
Elements elements = document.getElementsByAttribute("data-tabgr");
elements.stream()
.filter(element -> element.tagName().equals("div"))
.map(element -> element.children().get(0))
.forEach(element -> {
logger.info("element #{}", i++);
String str = element.attr("href");
str = str.substring(1, str.lastIndexOf("."));
.flatMap(element -> element.children().stream())
.map(element -> element.attr("href"))
.forEach(url -> this.browseAndSave(DOMAIN + url));
}
String str2 = element.attr("data");
str2 = browser.get("http://seasonvar.ru"+str2);
Document doc = Jsoup.parse(str2);
str2 = doc.getElementsByTag("img").get(0).attr("src");
@Override
public void browseAndSave(String url) {
Document document = Jsoup.parse(browser.get(url));
Element element = document.getElementsByAttributeValue("itemprop", "thumbnailUrl").get(0);
File saveToFile = new File(this.saveTo, str+".jpg");
this.fileDownloader.addFile(str2, saveToFile);
File saveTo = new File(this.saveTo, url.substring(url.indexOf("/")+1, url.lastIndexOf("."))+".jpg");
this.fileDownloader.addFile(element.attr("src"), saveTo);
CinemaDocument cinemaDocument = new CinemaDocument(saveToFile.getAbsolutePath(), this.getName());
this.mongoTemplate.save(cinemaDocument);
});
CinemaDocument cinemaDocument = new CinemaDocument(saveTo.getAbsolutePath(), this.getName());
this.repository.save(cinemaDocument);
}
}

View File

@@ -0,0 +1,9 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-06
*/
package kinosearch.kinosearch3.spider;
public interface BaseRepository {
void save(CinemaDocument cinemaDocument);
}

View File

@@ -1,116 +1,13 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
* 2017-04-06
*/
package kinosearch.kinosearch3.spider;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.channels.FileLock;
import java.util.AbstractMap;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class FileDownloader implements Runnable {
private Logger logger = LoggerFactory.getLogger(FileDownloader.class);
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
private final int threadCount;
private ThreadGroup threadGroup;
public FileDownloader(int threadCount, int capacity) {
this.threadCount = threadCount;
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
}
public void addFile(String url, File saveTo) {
try {
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
} catch (InterruptedException ignore) {
if(logger.isTraceEnabled()) {
logger.trace("cancel add file to queue");
}
}
}
void start() {
this.threadGroup = new ThreadGroup("FileDownloader");
for (int i = 1; i <= this.threadCount; i++) {
if (logger.isDebugEnabled()) {
logger.debug("Start '{}' thread", "Downloader #"+i);
}
(new Thread(threadGroup, this, "Downloader #"+i)).start();
}
}
public void stop() {
if (logger.isTraceEnabled()) {
logger.trace("Stopped threads");
}
this.threadGroup.interrupt();
}
@Override
public void run() {
Logger logger = LoggerFactory.getLogger(getClass());
while (!Thread.currentThread().isInterrupted()) {
Map.Entry<String, File> entry;
try {
entry = listOfEntries.take();
if (logger.isDebugEnabled()) {
logger.debug("take url for file '{}'", entry.getValue().getName());
}
} catch (InterruptedException e) {
if (logger.isTraceEnabled()) {
logger.trace("cancel take file by queue");
}
return;
}
FileLock fileLock = null;
URLConnection urlConnection = null;
try {
FileOutputStream fos = new FileOutputStream(entry.getValue());
fileLock = fos.getChannel().lock();
URL url = new URL(entry.getKey());
urlConnection = url.openConnection();
IOUtils.copy(urlConnection.getInputStream(), fos);
} catch (IOException e) {
logger.error("", e);
} finally {
if (fileLock != null) {
try {
fileLock.release();
} catch (IOException e) {
e.printStackTrace();
}
}
if (urlConnection != null) {
try {
urlConnection.getInputStream().close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (logger.isDebugEnabled()) {
logger.trace("file downloaded: '{}'", entry.getValue().getName());
}
}
}
if (logger.isTraceEnabled()) {
logger.debug("Thred closed");
}
}
public interface FileDownloader {
void addFile(String url, File saveTo);
void start();
void stop();
}

View File

@@ -0,0 +1,119 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-01
*/
package kinosearch.kinosearch3.spider;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.channels.FileLock;
import java.util.AbstractMap;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class FileDownloaderImpl implements FileDownloader, Runnable {
private Logger logger = LoggerFactory.getLogger(FileDownloaderImpl.class);
private final BlockingQueue<Map.Entry<String,File>> listOfEntries;
private final int threadCount;
private ThreadGroup threadGroup;
public FileDownloaderImpl(int threadCount, int capacity) {
this.threadCount = threadCount;
this.listOfEntries = new ArrayBlockingQueue<>(capacity, true);
}
@Override
public void addFile(String url, File saveTo) {
try {
listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo));
} catch (InterruptedException ignore) {
if(logger.isTraceEnabled()) {
logger.trace("cancel add file to queue");
}
}
}
@Override
public void start() {
this.threadGroup = new ThreadGroup("FileDownloader");
for (int i = 1; i <= this.threadCount; i++) {
if (logger.isDebugEnabled()) {
logger.debug("Start '{}' thread", "Downloader #"+i);
}
(new Thread(threadGroup, this, "Downloader #"+i)).start();
}
}
@Override
public void stop() {
if (logger.isTraceEnabled()) {
logger.trace("Stopped threads");
}
this.threadGroup.interrupt();
}
@Override
public void run() {
Logger logger = LoggerFactory.getLogger(getClass());
while (!Thread.currentThread().isInterrupted()) {
Map.Entry<String, File> entry;
try {
entry = listOfEntries.take();
if (logger.isDebugEnabled()) {
logger.debug("take url for file '{}'", entry.getValue().getName());
}
} catch (InterruptedException e) {
if (logger.isTraceEnabled()) {
logger.trace("cancel take file by queue");
}
return;
}
FileLock fileLock = null;
URLConnection urlConnection = null;
try {
FileOutputStream fos = new FileOutputStream(entry.getValue());
fileLock = fos.getChannel().lock();
URL url = new URL(entry.getKey());
urlConnection = url.openConnection();
IOUtils.copy(urlConnection.getInputStream(), fos);
} catch (IOException e) {
logger.error("", e);
} finally {
if (fileLock != null) {
try {
fileLock.release();
} catch (IOException e) {
e.printStackTrace();
}
}
if (urlConnection != null) {
try {
urlConnection.getInputStream().close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (logger.isDebugEnabled()) {
logger.trace("file downloaded: '{}'", entry.getValue().getName());
}
}
}
if (logger.isTraceEnabled()) {
logger.debug("Thred closed");
}
}
}

View File

@@ -0,0 +1,20 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-06
*/
package kinosearch.kinosearch3.spider;
import org.springframework.data.mongodb.core.MongoOperations;
public class MongoDBRepository implements BaseRepository {
private final MongoOperations mongoOperations;
public MongoDBRepository(MongoOperations mongoOperations) {
this.mongoOperations = mongoOperations;
}
@Override
public void save(CinemaDocument cinemaDocument) {
mongoOperations.save(cinemaDocument);
}
}

View File

@@ -7,4 +7,5 @@ package kinosearch.kinosearch3.spider;
public interface ScannerCinema {
String getName();
void run();
void browseAndSave(String url);
}

View File

@@ -23,7 +23,11 @@
<constructor-arg name="mongoDbFactory" ref="mongoDbFactory"/>
</bean>
<bean id="fileDownloader" class="kinosearch.kinosearch3.spider.FileDownloader">
<bean id="repository" class="kinosearch.kinosearch3.spider.MongoDBRepository">
<constructor-arg name="mongoOperations" ref="mongoTemplate"/>
</bean>
<bean id="fileDownloader" class="kinosearch.kinosearch3.spider.FileDownloaderImpl">
<constructor-arg name="threadCount" value="3"/>
<constructor-arg name="capacity" value="100"/>
</bean>
@@ -40,14 +44,14 @@
</bean>
</constructor-arg>
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/>
<constructor-arg name="mongoTemplate" ref="mongoTemplate"/>
<constructor-arg name="repository" ref="repository"/>
</bean>
<bean id="seasonvarScanner" class="kinosearch.kinosearch3.cinema.seasonvar.ScannerImpl">
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
<constructor-arg name="browser" ref="browser"/>
<constructor-arg name="saveTo" value="file:R:/seasonvar"/>
<constructor-arg name="mongoTemplate" ref="mongoTemplate"/>
<constructor-arg name="repository" ref="repository"/>
</bean>
<bean id="spider" class="kinosearch.kinosearch3.spider.Spider">

View File

@@ -0,0 +1,50 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-07
*/
package kinosearch.kinosearch3.cinema;
import de.flapdoodle.embed.mongo.Command;
import de.flapdoodle.embed.mongo.MongodExecutable;
import de.flapdoodle.embed.mongo.MongodProcess;
import de.flapdoodle.embed.mongo.MongodStarter;
import de.flapdoodle.embed.mongo.config.IMongodConfig;
import de.flapdoodle.embed.mongo.config.MongodConfigBuilder;
import de.flapdoodle.embed.mongo.config.Net;
import de.flapdoodle.embed.mongo.config.RuntimeConfigBuilder;
import de.flapdoodle.embed.mongo.distribution.Version;
import de.flapdoodle.embed.process.config.IRuntimeConfig;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public class AbstractScannerTest {
private static MongodProcess mongodProcess;
@BeforeClass
public static void initBase() throws IOException {
IRuntimeConfig runtimeConfig = new RuntimeConfigBuilder()
.defaultsWithLogger(Command.MongoD, LoggerFactory.getLogger("Mongod"))
.build();
MongodStarter starter = MongodStarter.getInstance(runtimeConfig);
IMongodConfig mongodConfig = new MongodConfigBuilder()
.version(Version.V3_0_5)
.net(new Net("127.0.0.1",27017, false))
.build();
MongodExecutable mongodExecutable = starter.prepare(mongodConfig);
mongodProcess = mongodExecutable.start();
}
@AfterClass
public static void shutdownBase() {
mongodProcess.stop();
try {
Thread.sleep(5000L);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

View File

@@ -0,0 +1,23 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-06
*/
package kinosearch.kinosearch3.cinema;
import kinosearch.kinosearch3.spider.FileDownloader;
import java.io.File;
public class FakeFileDownloader implements FileDownloader {
@Override
public void addFile(String url, File saveTo) {
}
@Override
public void start() {
}
@Override
public void stop() {
}
}

View File

@@ -0,0 +1,38 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-07
*/
package kinosearch.kinosearch3.cinema.onlinelife;
import kinosearch.kinosearch3.cinema.AbstractScannerTest;
import kinosearch.kinosearch3.spider.CinemaDocument;
import org.junit.*;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.MongoOperations;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
public class OnlinelifeScannerTest extends AbstractScannerTest {
@Autowired
@Qualifier("onlinelifeScanner")
private ScannerImpl scanner;
@Autowired
@Qualifier("mongoTemplate")
private MongoOperations mongoOperations;
@After
public void resetBase() {
mongoOperations.dropCollection("cinema");
}
@Test
public void browseAndSaveTest() {
scanner.browseAndSave("http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html");
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
}
}

View File

@@ -0,0 +1,39 @@
/*
* DmitriyMX <dimon550@gmail.com>
* 2017-04-06
*/
package kinosearch.kinosearch3.cinema.seasonvar;
import com.mongodb.CommandResult;
import kinosearch.kinosearch3.cinema.AbstractScannerTest;
import kinosearch.kinosearch3.spider.CinemaDocument;
import org.junit.*;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.MongoOperations;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
public class SeasonvarScannerTest extends AbstractScannerTest {
@Autowired
@Qualifier("seasonvarScanner")
private ScannerImpl scanner;
@Autowired
@Qualifier("mongoTemplate")
private MongoOperations mongoOperations;
@After
public void resetBase() {
mongoOperations.dropCollection("cinema");
}
@Test
public void browseAndSaveTest() {
scanner.browseAndSave("http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html");
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
}
}

View File

@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mongo="http://www.springframework.org/schema/data/mongo"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/data/mongo
http://www.springframework.org/schema/data/mongo/spring-mongo.xsd">
<mongo:mongo id="mongo" host="127.0.0.1" port="27017"/>
<mongo:db-factory id="mongoDbFactory" dbname="fakeBase" mongo-ref="mongo"/>
<bean id="mongoTemplate" class="org.springframework.data.mongodb.core.MongoTemplate">
<constructor-arg name="mongoDbFactory" ref="mongoDbFactory"/>
</bean>
<bean id="repository" class="kinosearch.kinosearch3.spider.MongoDBRepository">
<constructor-arg name="mongoOperations" ref="mongoTemplate"/>
</bean>
<bean id="fileDownloader" class="kinosearch.kinosearch3.cinema.FakeFileDownloader"/>
<bean id="browser" class="kinosearch.kinosearch3.browser.ApacheBrowser" scope="prototype">
<property name="encoding" value="utf-8"/>
</bean>
<bean id="onlinelifeScanner" class="kinosearch.kinosearch3.cinema.onlinelife.ScannerImpl">
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
<constructor-arg name="browser" ref="browser"/>
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/>
<constructor-arg name="repository" ref="repository"/>
</bean>
<bean id="seasonvarScanner" class="kinosearch.kinosearch3.cinema.seasonvar.ScannerImpl">
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
<constructor-arg name="browser" ref="browser"/>
<constructor-arg name="saveTo" value="file:R:/seasonvar"/>
<constructor-arg name="repository" ref="repository"/>
</bean>
</beans>