0

Подготовка Spider-а к релизу

This commit is contained in:
2017-04-07 18:11:50 +03:00
parent 8e4122afe0
commit f502665234
7 changed files with 51 additions and 9 deletions

View File

@@ -13,10 +13,13 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File; import java.io.File;
public class OnlinelifeScanner implements ScannerCinema { public class OnlinelifeScanner implements ScannerCinema {
private final Logger logger = LoggerFactory.getLogger(OnlinelifeScanner.class);
private FileDownloader fileDownloader; private FileDownloader fileDownloader;
private Browser browser; private Browser browser;
private File saveToDir; private File saveToDir;
@@ -47,7 +50,10 @@ public class OnlinelifeScanner implements ScannerCinema {
.flatMap(element -> element.children().stream()) .flatMap(element -> element.children().stream())
.filter(element -> element.tagName().equals("a")) .filter(element -> element.tagName().equals("a"))
.map(element -> element.attr("href")) .map(element -> element.attr("href"))
.forEach(this::browseAndSave); .forEach(url -> {
logger.info(url);
this.browseAndSave(url);
});
} }
} }

View File

@@ -13,11 +13,14 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File; import java.io.File;
public class SeasonvarScanner implements ScannerCinema { public class SeasonvarScanner implements ScannerCinema {
private static final String DOMAIN = "http://seasonvar.ru/"; private static final String DOMAIN = "http://seasonvar.ru";
private final Logger logger = LoggerFactory.getLogger(SeasonvarScanner.class);
private final FileDownloader fileDownloader; private final FileDownloader fileDownloader;
private final Browser browser; private final Browser browser;
private final File saveTo; private final File saveTo;
@@ -44,7 +47,10 @@ public class SeasonvarScanner implements ScannerCinema {
elements.stream() elements.stream()
.flatMap(element -> element.children().stream()) .flatMap(element -> element.children().stream())
.map(element -> element.attr("href")) .map(element -> element.attr("href"))
.forEach(url -> this.browseAndSave(DOMAIN + url)); .forEach(url -> {
logger.info(DOMAIN + url);
this.browseAndSave(DOMAIN + url);
});
} }
@Override @Override
@@ -52,7 +58,7 @@ public class SeasonvarScanner implements ScannerCinema {
Document document = Jsoup.parse(browser.get(url)); Document document = Jsoup.parse(browser.get(url));
Element element = document.getElementsByAttributeValue("itemprop", "thumbnailUrl").get(0); Element element = document.getElementsByAttributeValue("itemprop", "thumbnailUrl").get(0);
File saveTo = new File(this.saveTo, url.substring(url.indexOf("/")+1, url.lastIndexOf("."))+".jpg"); File saveTo = new File(this.saveTo, url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg");
this.fileDownloader.addFile(element.attr("src"), saveTo); this.fileDownloader.addFile(element.attr("src"), saveTo);
CinemaDocument cinemaDocument = new CinemaDocument(); CinemaDocument cinemaDocument = new CinemaDocument();

View File

@@ -18,8 +18,24 @@ public class Spider {
void start() { void start() {
fileDownloader.start(); fileDownloader.start();
ThreadGroup threadGroup = new ThreadGroup("Scanners");
for (ScannerCinema scanner : scanners) { for (ScannerCinema scanner : scanners) {
(new Thread(scanner::run, "Scanner " + scanner.getName())).start(); (new Thread(threadGroup, scanner::run, "Scanner " + scanner.getName())).start();
}
while (threadGroup.activeCount() > 0) {
sleep();
}
fileDownloader.stop();
}
private void sleep() {
try {
Thread.sleep(1000L);
} catch (InterruptedException e) {
// ignore
} }
} }
} }

View File

@@ -1,3 +1,4 @@
mongo.host=127.0.0.1 mongo.host=127.0.0.1
mongo.port=27017 mongo.port=27017
mongo.db=kinosearch mongo.db=kinosearch
saveRootDir=R:\\

View File

@@ -43,14 +43,14 @@
<property name="encoding" value="windows-1251"/> <property name="encoding" value="windows-1251"/>
</bean> </bean>
</constructor-arg> </constructor-arg>
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/> <constructor-arg name="saveToDir" value="file:${saveRootDir}/onlinelife"/>
<constructor-arg name="repository" ref="repository"/> <constructor-arg name="repository" ref="repository"/>
</bean> </bean>
<bean id="seasonvarScanner" class="kinosearch.kinosearch3.cinema.SeasonvarScanner"> <bean id="seasonvarScanner" class="kinosearch.kinosearch3.cinema.SeasonvarScanner">
<constructor-arg name="fileDownloader" ref="fileDownloader"/> <constructor-arg name="fileDownloader" ref="fileDownloader"/>
<constructor-arg name="browser" ref="browser"/> <constructor-arg name="browser" ref="browser"/>
<constructor-arg name="saveTo" value="file:R:/seasonvar"/> <constructor-arg name="saveTo" value="file:${saveRootDir}/seasonvar"/>
<constructor-arg name="repository" ref="repository"/> <constructor-arg name="repository" ref="repository"/>
</bean> </bean>

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration>
<Appenders>
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%-5level] (%t) \{%logger{1.}\} -- %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="info">
<AppenderRef ref="Console"/>
</Root>
</Loggers>
</Configuration>

View File

@@ -6,7 +6,7 @@
</Console> </Console>
</Appenders> </Appenders>
<Loggers> <Loggers>
<Root level="info"> <Root level="debug">
<AppenderRef ref="Console"/> <AppenderRef ref="Console"/>
</Root> </Root>
</Loggers> </Loggers>