Spider: добавлены теги к фильмам
This commit is contained in:
@@ -10,6 +10,8 @@ import lombok.Setter;
|
|||||||
import org.springframework.data.annotation.Id;
|
import org.springframework.data.annotation.Id;
|
||||||
import org.springframework.data.mongodb.core.mapping.Document;
|
import org.springframework.data.mongodb.core.mapping.Document;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Document(collection = "cinema")
|
@Document(collection = "cinema")
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
public class CinemaDocument {
|
public class CinemaDocument {
|
||||||
@@ -30,4 +32,7 @@ public class CinemaDocument {
|
|||||||
|
|
||||||
@Getter @Setter
|
@Getter @Setter
|
||||||
private String url;
|
private String url;
|
||||||
|
|
||||||
|
@Getter @Setter
|
||||||
|
private List<String> tags;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class OnlinelifeScanner implements ScannerCinema {
|
public class OnlinelifeScanner implements ScannerCinema {
|
||||||
private final Logger logger = LoggerFactory.getLogger(OnlinelifeScanner.class);
|
private final Logger logger = LoggerFactory.getLogger(OnlinelifeScanner.class);
|
||||||
@@ -72,12 +74,20 @@ public class OnlinelifeScanner implements ScannerCinema {
|
|||||||
String pathFile = "onlinelife/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg";
|
String pathFile = "onlinelife/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg";
|
||||||
this.fileDownloader.addFile(element.attr("src"), new File(this.saveToDir, pathFile));
|
this.fileDownloader.addFile(element.attr("src"), new File(this.saveToDir, pathFile));
|
||||||
|
|
||||||
|
element = document.getElementsByClass("film_info").get(0);
|
||||||
|
String title = element.child(0).child(0).text().trim();
|
||||||
|
List<String> tags = new ArrayList<>();
|
||||||
|
tags.add(element.child(1).child(0).text().toLowerCase().trim());
|
||||||
|
element = element.child(2).child(0);
|
||||||
|
element.children().forEach(el -> tags.add(el.text().toLowerCase().trim()));
|
||||||
|
|
||||||
CinemaDocument cinemaDocument = new CinemaDocument();
|
CinemaDocument cinemaDocument = new CinemaDocument();
|
||||||
cinemaDocument.setTitle(document.getElementsByClass("film_info").get(0).child(0).child(0).text().trim());
|
cinemaDocument.setTitle(title);
|
||||||
cinemaDocument.setDescription(document.getElementsByClass("film-description").get(0).text());
|
cinemaDocument.setDescription(document.getElementsByClass("film-description").get(0).text());
|
||||||
cinemaDocument.setFileName(pathFile);
|
cinemaDocument.setFileName(pathFile);
|
||||||
cinemaDocument.setTypeWarez(this.getName());
|
cinemaDocument.setTypeWarez(this.getName());
|
||||||
cinemaDocument.setUrl(url);
|
cinemaDocument.setUrl(url);
|
||||||
|
cinemaDocument.setTags(tags);
|
||||||
repository.save(cinemaDocument);
|
repository.save(cinemaDocument);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class SeasonvarScanner implements ScannerCinema {
|
public class SeasonvarScanner implements ScannerCinema {
|
||||||
private static final String DOMAIN = "http://seasonvar.ru";
|
private static final String DOMAIN = "http://seasonvar.ru";
|
||||||
@@ -62,6 +64,12 @@ public class SeasonvarScanner implements ScannerCinema {
|
|||||||
String pathFile = "seasonvar/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg";
|
String pathFile = "seasonvar/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg";
|
||||||
this.fileDownloader.addFile(element.attr("src"), new File(this.saveTo, pathFile));
|
this.fileDownloader.addFile(element.attr("src"), new File(this.saveTo, pathFile));
|
||||||
|
|
||||||
|
List<String> tags = new ArrayList<>();
|
||||||
|
tags.add(document.getElementsByClass("pgs-sinfo_list").get(2).child(0).text().trim());
|
||||||
|
|
||||||
|
Elements elements = document.getElementsByClass("pgs-stags").get(0).children();
|
||||||
|
elements.forEach(el -> tags.add(el.child(0).text().toLowerCase().trim()));
|
||||||
|
|
||||||
CinemaDocument cinemaDocument = new CinemaDocument();
|
CinemaDocument cinemaDocument = new CinemaDocument();
|
||||||
String title = document.getElementsByClass("pgs-sinfo-title").get(0).text();
|
String title = document.getElementsByClass("pgs-sinfo-title").get(0).text();
|
||||||
cinemaDocument.setTitle(title.replaceAll("^Сериал ", "").replaceAll(" онлайн$", ""));
|
cinemaDocument.setTitle(title.replaceAll("^Сериал ", "").replaceAll(" онлайн$", ""));
|
||||||
@@ -69,6 +77,7 @@ public class SeasonvarScanner implements ScannerCinema {
|
|||||||
cinemaDocument.setFileName(pathFile);
|
cinemaDocument.setFileName(pathFile);
|
||||||
cinemaDocument.setTypeWarez(this.getName());
|
cinemaDocument.setTypeWarez(this.getName());
|
||||||
cinemaDocument.setUrl(url);
|
cinemaDocument.setUrl(url);
|
||||||
|
cinemaDocument.setTags(tags);
|
||||||
this.repository.save(cinemaDocument);
|
this.repository.save(cinemaDocument);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,12 +14,17 @@ import de.flapdoodle.embed.mongo.config.Net;
|
|||||||
import de.flapdoodle.embed.mongo.config.RuntimeConfigBuilder;
|
import de.flapdoodle.embed.mongo.config.RuntimeConfigBuilder;
|
||||||
import de.flapdoodle.embed.mongo.distribution.Version;
|
import de.flapdoodle.embed.mongo.distribution.Version;
|
||||||
import de.flapdoodle.embed.process.config.IRuntimeConfig;
|
import de.flapdoodle.embed.process.config.IRuntimeConfig;
|
||||||
|
import kinosearch.kinosearch3.base.CinemaDocument;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
public class AbstractScannerTest {
|
public class AbstractScannerTest {
|
||||||
private static MongodProcess mongodProcess;
|
private static MongodProcess mongodProcess;
|
||||||
|
|
||||||
@@ -52,4 +57,12 @@ public class AbstractScannerTest {
|
|||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void assertTags(CinemaDocument cinemaDocument, String... tags) {
|
||||||
|
assertNotNull(cinemaDocument.getTags());
|
||||||
|
assertEquals(tags.length, cinemaDocument.getTags().size());
|
||||||
|
for (String tag : tags) {
|
||||||
|
assertTrue("tag '"+tag+"' not found", cinemaDocument.getTags().contains(tag));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,10 @@ import org.springframework.data.mongodb.core.MongoOperations;
|
|||||||
import org.springframework.test.context.ContextConfiguration;
|
import org.springframework.test.context.ContextConfiguration;
|
||||||
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
|
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
@RunWith(SpringJUnit4ClassRunner.class)
|
@RunWith(SpringJUnit4ClassRunner.class)
|
||||||
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
|
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
|
||||||
public class OnlinelifeScannerTest extends AbstractScannerTest {
|
public class OnlinelifeScannerTest extends AbstractScannerTest {
|
||||||
@@ -31,7 +35,14 @@ public class OnlinelifeScannerTest extends AbstractScannerTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void browseAndSaveTest() {
|
public void browseAndSaveTest() {
|
||||||
scanner.browseAndSave("http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html");
|
final String url = "http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html";
|
||||||
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
|
scanner.browseAndSave(url);
|
||||||
|
List<CinemaDocument> cinemaDocuments = mongoOperations.findAll(CinemaDocument.class);
|
||||||
|
assertEquals(1, cinemaDocuments.size());
|
||||||
|
|
||||||
|
CinemaDocument cinemaDocument = cinemaDocuments.get(0);
|
||||||
|
assertEquals("Пол: Секретный материальчик (Paul)", cinemaDocument.getTitle());
|
||||||
|
assertEquals(url, cinemaDocument.getUrl());
|
||||||
|
assertTags(cinemaDocument, "2011", "зарубежные фильмы", "комедия", "приключения", "фантастика");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,10 @@ import org.springframework.data.mongodb.core.MongoOperations;
|
|||||||
import org.springframework.test.context.ContextConfiguration;
|
import org.springframework.test.context.ContextConfiguration;
|
||||||
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
|
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
@RunWith(SpringJUnit4ClassRunner.class)
|
@RunWith(SpringJUnit4ClassRunner.class)
|
||||||
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
|
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
|
||||||
public class SeasonvarScannerTest extends AbstractScannerTest {
|
public class SeasonvarScannerTest extends AbstractScannerTest {
|
||||||
@@ -31,7 +35,15 @@ public class SeasonvarScannerTest extends AbstractScannerTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void browseAndSaveTest() {
|
public void browseAndSaveTest() {
|
||||||
scanner.browseAndSave("http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html");
|
final String url = "http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html";
|
||||||
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
|
scanner.browseAndSave(url);
|
||||||
|
|
||||||
|
List<CinemaDocument> cinemaDocuments = mongoOperations.findAll(CinemaDocument.class);
|
||||||
|
assertEquals(1, cinemaDocuments.size());
|
||||||
|
|
||||||
|
CinemaDocument cinemaDocument = cinemaDocuments.get(0);
|
||||||
|
assertEquals("А ты думал, что твоя жена в онлайн игре на самом деле не девушка?/Netoge no Yome wa Onnanoko ja Nai to Omotta?", cinemaDocument.getTitle());
|
||||||
|
assertEquals(url, cinemaDocument.getUrl());
|
||||||
|
assertTags(cinemaDocument, "2016", "школа", "игра", "онлайн игры", "виртуальный мир");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user