diff --git a/src/main/java/kinosearch/kinosearch3/base/CinemaDocument.java b/src/main/java/kinosearch/kinosearch3/base/CinemaDocument.java index 6da51a2..9492881 100644 --- a/src/main/java/kinosearch/kinosearch3/base/CinemaDocument.java +++ b/src/main/java/kinosearch/kinosearch3/base/CinemaDocument.java @@ -10,6 +10,8 @@ import lombok.Setter; import org.springframework.data.annotation.Id; import org.springframework.data.mongodb.core.mapping.Document; +import java.util.List; + @Document(collection = "cinema") @NoArgsConstructor public class CinemaDocument { @@ -30,4 +32,7 @@ public class CinemaDocument { @Getter @Setter private String url; + + @Getter @Setter + private List tags; } diff --git a/src/main/java/kinosearch/kinosearch3/cinema/OnlinelifeScanner.java b/src/main/java/kinosearch/kinosearch3/cinema/OnlinelifeScanner.java index b9a69fe..d7eee02 100644 --- a/src/main/java/kinosearch/kinosearch3/cinema/OnlinelifeScanner.java +++ b/src/main/java/kinosearch/kinosearch3/cinema/OnlinelifeScanner.java @@ -17,6 +17,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; +import java.util.ArrayList; +import java.util.List; public class OnlinelifeScanner implements ScannerCinema { private final Logger logger = LoggerFactory.getLogger(OnlinelifeScanner.class); @@ -72,12 +74,20 @@ public class OnlinelifeScanner implements ScannerCinema { String pathFile = "onlinelife/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg"; this.fileDownloader.addFile(element.attr("src"), new File(this.saveToDir, pathFile)); + element = document.getElementsByClass("film_info").get(0); + String title = element.child(0).child(0).text().trim(); + List tags = new ArrayList<>(); + tags.add(element.child(1).child(0).text().toLowerCase().trim()); + element = element.child(2).child(0); + element.children().forEach(el -> tags.add(el.text().toLowerCase().trim())); + CinemaDocument cinemaDocument = new CinemaDocument(); - cinemaDocument.setTitle(document.getElementsByClass("film_info").get(0).child(0).child(0).text().trim()); + cinemaDocument.setTitle(title); cinemaDocument.setDescription(document.getElementsByClass("film-description").get(0).text()); cinemaDocument.setFileName(pathFile); cinemaDocument.setTypeWarez(this.getName()); cinemaDocument.setUrl(url); + cinemaDocument.setTags(tags); repository.save(cinemaDocument); } diff --git a/src/main/java/kinosearch/kinosearch3/cinema/SeasonvarScanner.java b/src/main/java/kinosearch/kinosearch3/cinema/SeasonvarScanner.java index 23975ed..88216ed 100644 --- a/src/main/java/kinosearch/kinosearch3/cinema/SeasonvarScanner.java +++ b/src/main/java/kinosearch/kinosearch3/cinema/SeasonvarScanner.java @@ -17,6 +17,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; +import java.util.ArrayList; +import java.util.List; public class SeasonvarScanner implements ScannerCinema { private static final String DOMAIN = "http://seasonvar.ru"; @@ -62,6 +64,12 @@ public class SeasonvarScanner implements ScannerCinema { String pathFile = "seasonvar/"+url.substring(url.lastIndexOf("/")+1, url.lastIndexOf("."))+".jpg"; this.fileDownloader.addFile(element.attr("src"), new File(this.saveTo, pathFile)); + List tags = new ArrayList<>(); + tags.add(document.getElementsByClass("pgs-sinfo_list").get(2).child(0).text().trim()); + + Elements elements = document.getElementsByClass("pgs-stags").get(0).children(); + elements.forEach(el -> tags.add(el.child(0).text().toLowerCase().trim())); + CinemaDocument cinemaDocument = new CinemaDocument(); String title = document.getElementsByClass("pgs-sinfo-title").get(0).text(); cinemaDocument.setTitle(title.replaceAll("^Сериал ", "").replaceAll(" онлайн$", "")); @@ -69,6 +77,7 @@ public class SeasonvarScanner implements ScannerCinema { cinemaDocument.setFileName(pathFile); cinemaDocument.setTypeWarez(this.getName()); cinemaDocument.setUrl(url); + cinemaDocument.setTags(tags); this.repository.save(cinemaDocument); } } diff --git a/src/test/java/kinosearch/kinosearch3/cinema/AbstractScannerTest.java b/src/test/java/kinosearch/kinosearch3/cinema/AbstractScannerTest.java index 7fa2388..ce698f9 100644 --- a/src/test/java/kinosearch/kinosearch3/cinema/AbstractScannerTest.java +++ b/src/test/java/kinosearch/kinosearch3/cinema/AbstractScannerTest.java @@ -14,12 +14,17 @@ import de.flapdoodle.embed.mongo.config.Net; import de.flapdoodle.embed.mongo.config.RuntimeConfigBuilder; import de.flapdoodle.embed.mongo.distribution.Version; import de.flapdoodle.embed.process.config.IRuntimeConfig; +import kinosearch.kinosearch3.base.CinemaDocument; import org.junit.AfterClass; import org.junit.BeforeClass; import org.slf4j.LoggerFactory; import java.io.IOException; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + public class AbstractScannerTest { private static MongodProcess mongodProcess; @@ -52,4 +57,12 @@ public class AbstractScannerTest { e.printStackTrace(); } } + + void assertTags(CinemaDocument cinemaDocument, String... tags) { + assertNotNull(cinemaDocument.getTags()); + assertEquals(tags.length, cinemaDocument.getTags().size()); + for (String tag : tags) { + assertTrue("tag '"+tag+"' not found", cinemaDocument.getTags().contains(tag)); + } + } } diff --git a/src/test/java/kinosearch/kinosearch3/cinema/OnlinelifeScannerTest.java b/src/test/java/kinosearch/kinosearch3/cinema/OnlinelifeScannerTest.java index cdc7e15..9dfa840 100644 --- a/src/test/java/kinosearch/kinosearch3/cinema/OnlinelifeScannerTest.java +++ b/src/test/java/kinosearch/kinosearch3/cinema/OnlinelifeScannerTest.java @@ -13,6 +13,10 @@ import org.springframework.data.mongodb.core.MongoOperations; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import java.util.List; + +import static org.junit.Assert.*; + @RunWith(SpringJUnit4ClassRunner.class) @ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml") public class OnlinelifeScannerTest extends AbstractScannerTest { @@ -31,7 +35,14 @@ public class OnlinelifeScannerTest extends AbstractScannerTest { @Test public void browseAndSaveTest() { - scanner.browseAndSave("http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html"); - Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size()); + final String url = "http://www.online-life.cc/76-pol-sekretnyy-materialchik-onlayn.html"; + scanner.browseAndSave(url); + List cinemaDocuments = mongoOperations.findAll(CinemaDocument.class); + assertEquals(1, cinemaDocuments.size()); + + CinemaDocument cinemaDocument = cinemaDocuments.get(0); + assertEquals("Пол: Секретный материальчик (Paul)", cinemaDocument.getTitle()); + assertEquals(url, cinemaDocument.getUrl()); + assertTags(cinemaDocument, "2011", "зарубежные фильмы", "комедия", "приключения", "фантастика"); } } diff --git a/src/test/java/kinosearch/kinosearch3/cinema/SeasonvarScannerTest.java b/src/test/java/kinosearch/kinosearch3/cinema/SeasonvarScannerTest.java index 78276e2..3093670 100644 --- a/src/test/java/kinosearch/kinosearch3/cinema/SeasonvarScannerTest.java +++ b/src/test/java/kinosearch/kinosearch3/cinema/SeasonvarScannerTest.java @@ -13,6 +13,10 @@ import org.springframework.data.mongodb.core.MongoOperations; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import java.util.List; + +import static org.junit.Assert.*; + @RunWith(SpringJUnit4ClassRunner.class) @ContextConfiguration("/kinosearch/kinosearch3/spider/test-spring.xml") public class SeasonvarScannerTest extends AbstractScannerTest { @@ -31,7 +35,15 @@ public class SeasonvarScannerTest extends AbstractScannerTest { @Test public void browseAndSaveTest() { - scanner.browseAndSave("http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html"); - Assert.assertEquals(1, mongoOperations.findAll(CinemaDocument.class).size()); + final String url = "http://seasonvar.ru/serial-13451-A_ty_dumal_chto_tvoya_zhena_v_onlajn_igre_na_samom_dele_ne_devushka.html"; + scanner.browseAndSave(url); + + List cinemaDocuments = mongoOperations.findAll(CinemaDocument.class); + assertEquals(1, cinemaDocuments.size()); + + CinemaDocument cinemaDocument = cinemaDocuments.get(0); + assertEquals("А ты думал, что твоя жена в онлайн игре на самом деле не девушка?/Netoge no Yome wa Onnanoko ja Nai to Omotta?", cinemaDocument.getTitle()); + assertEquals(url, cinemaDocument.getUrl()); + assertTags(cinemaDocument, "2016", "школа", "игра", "онлайн игры", "виртуальный мир"); } }