Spider: добавлены теги к фильмам
This commit is contained in:
@@ -10,6 +10,8 @@ import lombok.Setter;
|
||||
import org.springframework.data.annotation.Id;
|
||||
import org.springframework.data.mongodb.core.mapping.Document;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Document(collection = "cinema")
|
||||
@NoArgsConstructor
|
||||
public class CinemaDocument {
|
||||
@@ -30,4 +32,7 @@ public class CinemaDocument {
|
||||
|
||||
@Getter @Setter
|
||||
private String url;
|
||||
|
||||
@Getter @Setter
|
||||
private List<String> tags;
|
||||
}
|
||||
|
||||
@@ -17,6 +17,8 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class OnlinelifeScanner implements ScannerCinema {
|
||||
private final Logger logger = LoggerFactory.getLogger(OnlinelifeScanner.class);
|
||||
@@ -72,12 +74,20 @@ public class OnlinelifeScanner implements ScannerCinema {
|
||||
String pathFile = "onlinelife/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg";
|
||||
this.fileDownloader.addFile(element.attr("src"), new File(this.saveToDir, pathFile));
|
||||
|
||||
element = document.getElementsByClass("film_info").get(0);
|
||||
String title = element.child(0).child(0).text().trim();
|
||||
List<String> tags = new ArrayList<>();
|
||||
tags.add(element.child(1).child(0).text().toLowerCase().trim());
|
||||
element = element.child(2).child(0);
|
||||
element.children().forEach(el -> tags.add(el.text().toLowerCase().trim()));
|
||||
|
||||
CinemaDocument cinemaDocument = new CinemaDocument();
|
||||
cinemaDocument.setTitle(document.getElementsByClass("film_info").get(0).child(0).child(0).text().trim());
|
||||
cinemaDocument.setTitle(title);
|
||||
cinemaDocument.setDescription(document.getElementsByClass("film-description").get(0).text());
|
||||
cinemaDocument.setFileName(pathFile);
|
||||
cinemaDocument.setTypeWarez(this.getName());
|
||||
cinemaDocument.setUrl(url);
|
||||
cinemaDocument.setTags(tags);
|
||||
repository.save(cinemaDocument);
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,8 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class SeasonvarScanner implements ScannerCinema {
|
||||
private static final String DOMAIN = "http://seasonvar.ru";
|
||||
@@ -62,6 +64,12 @@ public class SeasonvarScanner implements ScannerCinema {
|
||||
String pathFile = "seasonvar/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg";
|
||||
this.fileDownloader.addFile(element.attr("src"), new File(this.saveTo, pathFile));
|
||||
|
||||
List<String> tags = new ArrayList<>();
|
||||
tags.add(document.getElementsByClass("pgs-sinfo_list").get(2).child(0).text().trim());
|
||||
|
||||
Elements elements = document.getElementsByClass("pgs-stags").get(0).children();
|
||||
elements.forEach(el -> tags.add(el.child(0).text().toLowerCase().trim()));
|
||||
|
||||
CinemaDocument cinemaDocument = new CinemaDocument();
|
||||
String title = document.getElementsByClass("pgs-sinfo-title").get(0).text();
|
||||
cinemaDocument.setTitle(title.replaceAll("^Сериал ", "").replaceAll(" онлайн$", ""));
|
||||
@@ -69,6 +77,7 @@ public class SeasonvarScanner implements ScannerCinema {
|
||||
cinemaDocument.setFileName(pathFile);
|
||||
cinemaDocument.setTypeWarez(this.getName());
|
||||
cinemaDocument.setUrl(url);
|
||||
cinemaDocument.setTags(tags);
|
||||
this.repository.save(cinemaDocument);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,12 +14,17 @@ import de.flapdoodle.embed.mongo.config.Net;
|
||||
import de.flapdoodle.embed.mongo.config.RuntimeConfigBuilder;
|
||||
import de.flapdoodle.embed.mongo.distribution.Version;
|
||||
import de.flapdoodle.embed.process.config.IRuntimeConfig;
|
||||
import kinosearch.kinosearch3.base.CinemaDocument;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class AbstractScannerTest {
|
||||
private static MongodProcess mongodProcess;
|
||||
|
||||
@@ -52,4 +57,12 @@ public class AbstractScannerTest {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
void assertTags(CinemaDocument cinemaDocument, String... tags) {
|
||||
assertNotNull(cinemaDocument.getTags());
|
||||
assertEquals(tags.length, cinemaDocument.getTags().size());
|
||||
for (String tag : tags) {
|
||||
assertTrue("tag '"+tag+"' not found", cinemaDocument.getTags().contains(tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,10 @@ import org.springframework.data.mongodb.core.MongoOperations;
|
||||
import org.springframework.test.context.ContextConfiguration;
|
||||
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
@RunWith(SpringJUnit4ClassRunner.class)
|
||||
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
|
||||
public class OnlinelifeScannerTest extends AbstractScannerTest {
|
||||
@@ -31,7 +35,14 @@ public class OnlinelifeScannerTest extends AbstractScannerTest {
|
||||
|
||||
@Test
|
||||
public void browseAndSaveTest() {
|
||||
scanner.browseAndSave("http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html");
|
||||
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
|
||||
final String url = "http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html";
|
||||
scanner.browseAndSave(url);
|
||||
List<CinemaDocument> cinemaDocuments = mongoOperations.findAll(CinemaDocument.class);
|
||||
assertEquals(1, cinemaDocuments.size());
|
||||
|
||||
CinemaDocument cinemaDocument = cinemaDocuments.get(0);
|
||||
assertEquals("Пол: Секретный материальчик (Paul)", cinemaDocument.getTitle());
|
||||
assertEquals(url, cinemaDocument.getUrl());
|
||||
assertTags(cinemaDocument, "2011", "зарубежные фильмы", "комедия", "приключения", "фантастика");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,10 @@ import org.springframework.data.mongodb.core.MongoOperations;
|
||||
import org.springframework.test.context.ContextConfiguration;
|
||||
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
@RunWith(SpringJUnit4ClassRunner.class)
|
||||
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
|
||||
public class SeasonvarScannerTest extends AbstractScannerTest {
|
||||
@@ -31,7 +35,15 @@ public class SeasonvarScannerTest extends AbstractScannerTest {
|
||||
|
||||
@Test
|
||||
public void browseAndSaveTest() {
|
||||
scanner.browseAndSave("http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html");
|
||||
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
|
||||
final String url = "http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html";
|
||||
scanner.browseAndSave(url);
|
||||
|
||||
List<CinemaDocument> cinemaDocuments = mongoOperations.findAll(CinemaDocument.class);
|
||||
assertEquals(1, cinemaDocuments.size());
|
||||
|
||||
CinemaDocument cinemaDocument = cinemaDocuments.get(0);
|
||||
assertEquals("А ты думал, что твоя жена в онлайн игре на самом деле не девушка?/Netoge no Yome wa Onnanoko ja Nai to Omotta?", cinemaDocument.getTitle());
|
||||
assertEquals(url, cinemaDocument.getUrl());
|
||||
assertTags(cinemaDocument, "2016", "школа", "игра", "онлайн игры", "виртуальный мир");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user