0

Spider: добавлены теги к фильмам

This commit is contained in:
2017-07-01 22:29:46 +03:00
parent eb029f1156
commit 64971f3f5d
6 changed files with 65 additions and 5 deletions

View File

@@ -10,6 +10,8 @@ import lombok.Setter;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import java.util.List;
@Document(collection = "cinema")
@NoArgsConstructor
public class CinemaDocument {
@@ -30,4 +32,7 @@ public class CinemaDocument {
@Getter @Setter
private String url;
@Getter @Setter
private List<String> tags;
}

View File

@@ -17,6 +17,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
public class OnlinelifeScanner implements ScannerCinema {
private final Logger logger = LoggerFactory.getLogger(OnlinelifeScanner.class);
@@ -72,12 +74,20 @@ public class OnlinelifeScanner implements ScannerCinema {
String pathFile = "onlinelife/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg";
this.fileDownloader.addFile(element.attr("src"), new File(this.saveToDir, pathFile));
element = document.getElementsByClass("film_info").get(0);
String title = element.child(0).child(0).text().trim();
List<String> tags = new ArrayList<>();
tags.add(element.child(1).child(0).text().toLowerCase().trim());
element = element.child(2).child(0);
element.children().forEach(el -> tags.add(el.text().toLowerCase().trim()));
CinemaDocument cinemaDocument = new CinemaDocument();
cinemaDocument.setTitle(document.getElementsByClass("film_info").get(0).child(0).child(0).text().trim());
cinemaDocument.setTitle(title);
cinemaDocument.setDescription(document.getElementsByClass("film-description").get(0).text());
cinemaDocument.setFileName(pathFile);
cinemaDocument.setTypeWarez(this.getName());
cinemaDocument.setUrl(url);
cinemaDocument.setTags(tags);
repository.save(cinemaDocument);
}

View File

@@ -17,6 +17,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
public class SeasonvarScanner implements ScannerCinema {
private static final String DOMAIN = "http://seasonvar.ru";
@@ -62,6 +64,12 @@ public class SeasonvarScanner implements ScannerCinema {
String pathFile = "seasonvar/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg";
this.fileDownloader.addFile(element.attr("src"), new File(this.saveTo, pathFile));
List<String> tags = new ArrayList<>();
tags.add(document.getElementsByClass("pgs-sinfo_list").get(2).child(0).text().trim());
Elements elements = document.getElementsByClass("pgs-stags").get(0).children();
elements.forEach(el -> tags.add(el.child(0).text().toLowerCase().trim()));
CinemaDocument cinemaDocument = new CinemaDocument();
String title = document.getElementsByClass("pgs-sinfo-title").get(0).text();
cinemaDocument.setTitle(title.replaceAll("^Сериал ", "").replaceAll(" онлайн$", ""));
@@ -69,6 +77,7 @@ public class SeasonvarScanner implements ScannerCinema {
cinemaDocument.setFileName(pathFile);
cinemaDocument.setTypeWarez(this.getName());
cinemaDocument.setUrl(url);
cinemaDocument.setTags(tags);
this.repository.save(cinemaDocument);
}
}

View File

@@ -14,12 +14,17 @@ import de.flapdoodle.embed.mongo.config.Net;
import de.flapdoodle.embed.mongo.config.RuntimeConfigBuilder;
import de.flapdoodle.embed.mongo.distribution.Version;
import de.flapdoodle.embed.process.config.IRuntimeConfig;
import kinosearch.kinosearch3.base.CinemaDocument;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
public class AbstractScannerTest {
private static MongodProcess mongodProcess;
@@ -52,4 +57,12 @@ public class AbstractScannerTest {
e.printStackTrace();
}
}
void assertTags(CinemaDocument cinemaDocument, String... tags) {
assertNotNull(cinemaDocument.getTags());
assertEquals(tags.length, cinemaDocument.getTags().size());
for (String tag : tags) {
assertTrue("tag '"+tag+"' not found", cinemaDocument.getTags().contains(tag));
}
}
}

View File

@@ -13,6 +13,10 @@ import org.springframework.data.mongodb.core.MongoOperations;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import java.util.List;
import static org.junit.Assert.*;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
public class OnlinelifeScannerTest extends AbstractScannerTest {
@@ -31,7 +35,14 @@ public class OnlinelifeScannerTest extends AbstractScannerTest {
@Test
public void browseAndSaveTest() {
scanner.browseAndSave("http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html");
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
final String url = "http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html";
scanner.browseAndSave(url);
List<CinemaDocument> cinemaDocuments = mongoOperations.findAll(CinemaDocument.class);
assertEquals(1, cinemaDocuments.size());
CinemaDocument cinemaDocument = cinemaDocuments.get(0);
assertEquals("Пол: Секретный материальчик (Paul)", cinemaDocument.getTitle());
assertEquals(url, cinemaDocument.getUrl());
assertTags(cinemaDocument, "2011", "зарубежные фильмы", "комедия", "приключения", "фантастика");
}
}

View File

@@ -13,6 +13,10 @@ import org.springframework.data.mongodb.core.MongoOperations;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import java.util.List;
import static org.junit.Assert.*;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml")
public class SeasonvarScannerTest extends AbstractScannerTest {
@@ -31,7 +35,15 @@ public class SeasonvarScannerTest extends AbstractScannerTest {
@Test
public void browseAndSaveTest() {
scanner.browseAndSave("http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html");
Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size());
final String url = "http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html";
scanner.browseAndSave(url);
List<CinemaDocument> cinemaDocuments = mongoOperations.findAll(CinemaDocument.class);
assertEquals(1, cinemaDocuments.size());
CinemaDocument cinemaDocument = cinemaDocuments.get(0);
assertEquals("А ты думал, что твоя жена в онлайн игре на самом деле не девушка?/Netoge no Yome wa Onnanoko ja Nai to Omotta?", cinemaDocument.getTitle());
assertEquals(url, cinemaDocument.getUrl());
assertTags(cinemaDocument, "2016", "школа", "игра", "онлайн игры", "виртуальный мир");
}
}