Подготовка Spider-а к релизу
This commit is contained in:
@@ -13,10 +13,13 @@ import org.jsoup.Jsoup;
|
|||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
public class OnlinelifeScanner implements ScannerCinema {
|
public class OnlinelifeScanner implements ScannerCinema {
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(OnlinelifeScanner.class);
|
||||||
private FileDownloader fileDownloader;
|
private FileDownloader fileDownloader;
|
||||||
private Browser browser;
|
private Browser browser;
|
||||||
private File saveToDir;
|
private File saveToDir;
|
||||||
@@ -47,7 +50,10 @@ public class OnlinelifeScanner implements ScannerCinema {
|
|||||||
.flatMap(element -> element.children().stream())
|
.flatMap(element -> element.children().stream())
|
||||||
.filter(element -> element.tagName().equals("a"))
|
.filter(element -> element.tagName().equals("a"))
|
||||||
.map(element -> element.attr("href"))
|
.map(element -> element.attr("href"))
|
||||||
.forEach(this::browseAndSave);
|
.forEach(url -> {
|
||||||
|
logger.info(url);
|
||||||
|
this.browseAndSave(url);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,11 +13,14 @@ import org.jsoup.Jsoup;
|
|||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
public class SeasonvarScanner implements ScannerCinema {
|
public class SeasonvarScanner implements ScannerCinema {
|
||||||
private static final String DOMAIN = "http://seasonvar.ru/";
|
private static final String DOMAIN = "http://seasonvar.ru";
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(SeasonvarScanner.class);
|
||||||
private final FileDownloader fileDownloader;
|
private final FileDownloader fileDownloader;
|
||||||
private final Browser browser;
|
private final Browser browser;
|
||||||
private final File saveTo;
|
private final File saveTo;
|
||||||
@@ -44,7 +47,10 @@ public class SeasonvarScanner implements ScannerCinema {
|
|||||||
elements.stream()
|
elements.stream()
|
||||||
.flatMap(element -> element.children().stream())
|
.flatMap(element -> element.children().stream())
|
||||||
.map(element -> element.attr("href"))
|
.map(element -> element.attr("href"))
|
||||||
.forEach(url -> this.browseAndSave(DOMAIN + url));
|
.forEach(url -> {
|
||||||
|
logger.info(DOMAIN + url);
|
||||||
|
this.browseAndSave(DOMAIN + url);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -52,7 +58,7 @@ public class SeasonvarScanner implements ScannerCinema {
|
|||||||
Document document = Jsoup.parse(browser.get(url));
|
Document document = Jsoup.parse(browser.get(url));
|
||||||
Element element = document.getElementsByAttributeValue("itemprop", "thumbnailUrl").get(0);
|
Element element = document.getElementsByAttributeValue("itemprop", "thumbnailUrl").get(0);
|
||||||
|
|
||||||
File saveTo = new File(this.saveTo, url.substring(url.indexOf("/")+1, url.lastIndexOf("."))+".jpg");
|
File saveTo = new File(this.saveTo, url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg");
|
||||||
this.fileDownloader.addFile(element.attr("src"), saveTo);
|
this.fileDownloader.addFile(element.attr("src"), saveTo);
|
||||||
|
|
||||||
CinemaDocument cinemaDocument = new CinemaDocument();
|
CinemaDocument cinemaDocument = new CinemaDocument();
|
||||||
|
|||||||
@@ -18,8 +18,24 @@ public class Spider {
|
|||||||
|
|
||||||
void start() {
|
void start() {
|
||||||
fileDownloader.start();
|
fileDownloader.start();
|
||||||
|
|
||||||
|
ThreadGroup threadGroup = new ThreadGroup("Scanners");
|
||||||
for (ScannerCinema scanner : scanners) {
|
for (ScannerCinema scanner : scanners) {
|
||||||
(new Thread(scanner::run, "Scanner " + scanner.getName())).start();
|
(new Thread(threadGroup, scanner::run, "Scanner " + scanner.getName())).start();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (threadGroup.activeCount() > 0) {
|
||||||
|
sleep();
|
||||||
|
}
|
||||||
|
|
||||||
|
fileDownloader.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void sleep() {
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000L);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
// ignore
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
mongo.host=127.0.0.1
|
mongo.host=127.0.0.1
|
||||||
mongo.port=27017
|
mongo.port=27017
|
||||||
mongo.db=kinosearch
|
mongo.db=kinosearch
|
||||||
|
saveRootDir=R:\\
|
||||||
@@ -43,14 +43,14 @@
|
|||||||
<property name="encoding" value="windows-1251"/>
|
<property name="encoding" value="windows-1251"/>
|
||||||
</bean>
|
</bean>
|
||||||
</constructor-arg>
|
</constructor-arg>
|
||||||
<constructor-arg name="saveToDir" value="file:R:/onlinelife"/>
|
<constructor-arg name="saveToDir" value="file:${saveRootDir}/onlinelife"/>
|
||||||
<constructor-arg name="repository" ref="repository"/>
|
<constructor-arg name="repository" ref="repository"/>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
<bean id="seasonvarScanner" class="kinosearch.kinosearch3.cinema.SeasonvarScanner">
|
<bean id="seasonvarScanner" class="kinosearch.kinosearch3.cinema.SeasonvarScanner">
|
||||||
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
<constructor-arg name="fileDownloader" ref="fileDownloader"/>
|
||||||
<constructor-arg name="browser" ref="browser"/>
|
<constructor-arg name="browser" ref="browser"/>
|
||||||
<constructor-arg name="saveTo" value="file:R:/seasonvar"/>
|
<constructor-arg name="saveTo" value="file:${saveRootDir}/seasonvar"/>
|
||||||
<constructor-arg name="repository" ref="repository"/>
|
<constructor-arg name="repository" ref="repository"/>
|
||||||
</bean>
|
</bean>
|
||||||
|
|
||||||
|
|||||||
13
src/main/resources/log4j2.xml
Normal file
13
src/main/resources/log4j2.xml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<Configuration>
|
||||||
|
<Appenders>
|
||||||
|
<Console name="Console" target="SYSTEM_OUT">
|
||||||
|
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%-5level] (%t) \{%logger{1.}\} -- %msg%n"/>
|
||||||
|
</Console>
|
||||||
|
</Appenders>
|
||||||
|
<Loggers>
|
||||||
|
<Root level="info">
|
||||||
|
<AppenderRef ref="Console"/>
|
||||||
|
</Root>
|
||||||
|
</Loggers>
|
||||||
|
</Configuration>
|
||||||
@@ -6,7 +6,7 @@
|
|||||||
</Console>
|
</Console>
|
||||||
</Appenders>
|
</Appenders>
|
||||||
<Loggers>
|
<Loggers>
|
||||||
<Root level="info">
|
<Root level="debug">
|
||||||
<AppenderRef ref="Console"/>
|
<AppenderRef ref="Console"/>
|
||||||
</Root>
|
</Root>
|
||||||
</Loggers>
|
</Loggers>
|
||||||
|
|||||||
Reference in New Issue
Block a user