0

Выделяем код сбора информации в отдельный класс

This commit is contained in:
2019-01-07 16:30:49 +03:00
parent 5a5f671b02
commit 92ac92c000
3 changed files with 81 additions and 65 deletions

View File

@@ -11,7 +11,7 @@ import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
import java.util.List; import java.util.List;
@Configuration @Configuration
@ComponentScan({ "ks.server.controllers", "ks.server.browser" }) @ComponentScan({ "ks.server.controllers", "ks.server.browser", "ks.server.cinema" })
@EnableWebMvc @EnableWebMvc
public class SpringConfigMVC implements WebMvcConfigurer { public class SpringConfigMVC implements WebMvcConfigurer {
@Override @Override

View File

@@ -0,0 +1,75 @@
package ks.server.cinema;
import ks.server.browser.Browser;
import lombok.Getter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Component("animevost")
@Getter
public class Animevost {
private static final transient String DOMAIN_URI = "http://animevost.org/";
private String title;
private String poster;
private Integer year;
private List<String> genre;
private String type;
private String countSeries;
private String director;
private String description;
public void fillInfo(String uri, Browser browser) {
final Document htmlDocument = Jsoup.parse(browser.get(DOMAIN_URI + uri));
// /html/body/section/div/div[1]/div[2]/div[2]
// .infoContent
final Element infoContent = htmlDocument.select(".infoContent").first();
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/h3
// .infoContent > h3:nth-child(1)
title = infoContent.select("h3:nth-child(1)").first().text();
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/div[1]/img
// .infoContent > div:nth-child(2) > img:nth-child(1)
poster = DOMAIN_URI + infoContent.select("div:nth-child(2) > img:nth-child(1)").first()
.attr("src").substring(1);
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[1]
// .infoContent > p:nth-child(3)
year = Integer.parseInt(infoContent.select("p:nth-child(3)").first().childNode(1).toString().trim());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[2]
// .infoContent > p:nth-child(4)
genre = Stream.of(
infoContent.select("p:nth-child(4)").first().childNode(1).toString().trim()
.split(","))
.map(String::trim).collect(Collectors.toList());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[3]
// .infoContent > p:nth-child(5)
type = infoContent.select("p:nth-child(5)").first().childNode(1).toString().trim();
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[4]
// .infoContent > p:nth-child(6)
countSeries = infoContent.select("p:nth-child(6)").first().childNode(1).toString().trim();
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[5]/a
// .infoContent > p:nth-child(7) > a:nth-child(2)
director = infoContent.select("p:nth-child(7) > a:nth-child(2)").first().text();
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[6]
// .infoContent > p:nth-child(8)
description = infoContent.select("p:nth-child(8)").first().childNodes().stream()
.skip(1)
.map(node -> node.toString().trim())
.filter(text -> !text.isEmpty() && !text.equalsIgnoreCase("<br>"))
.collect(Collectors.joining(" "));
}
}

View File

@@ -1,9 +1,7 @@
package ks.server.controllers; package ks.server.controllers;
import ks.server.browser.Browser; import ks.server.browser.Browser;
import org.jsoup.Jsoup; import ks.server.cinema.Animevost;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType; import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMapping;
@@ -12,10 +10,7 @@ import org.springframework.web.bind.annotation.RestController;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@RestController @RestController
@RequestMapping( @RequestMapping(
@@ -36,66 +31,12 @@ public class WebController {
} }
@RequestMapping(path = "/c/animevost/info/**") @RequestMapping(path = "/c/animevost/info/**")
public Map<String, Object> animevostInfo(HttpServletRequest request) { public Animevost animevostInfo(HttpServletRequest request) {
final String domainUri = "http://animevost.org/";
final String path = subpath(request.getRequestURI(), "/info/"); final String path = subpath(request.getRequestURI(), "/info/");
final String cinemaRequestUri = domainUri + path;
final Map<String, Object> resultMap = new HashMap<>(); Animevost animevost = new Animevost();
resultMap.put("_uri", cinemaRequestUri); animevost.fillInfo(path, browser);
Document htmlDocument = Jsoup.parse(browser.get(cinemaRequestUri)); return animevost;
// /html/body/section/div/div[1]/div[2]/div[2]
// .infoContent
Element infoContent = htmlDocument.select(".infoContent").first();
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/h3
// .infoContent > h3:nth-child(1)
resultMap.put("title",
infoContent.select("h3:nth-child(1)").first().text());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/div[1]/img
// .infoContent > div:nth-child(2) > img:nth-child(1)
resultMap.put("poster", domainUri +
infoContent.select("div:nth-child(2) > img:nth-child(1)").first().attr("src"));
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[1]
// .infoContent > p:nth-child(3)
resultMap.put("year",
Integer.parseInt(infoContent.select("p:nth-child(3)").first().childNode(1).toString().trim()));
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[2]
// .infoContent > p:nth-child(4)
resultMap.put("genre", Stream.of(
infoContent.select("p:nth-child(4)").first().childNode(1).toString().trim()
.split(","))
.map(String::trim).collect(Collectors.toList()));
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[3]
// .infoContent > p:nth-child(5)
resultMap.put("type",
infoContent.select("p:nth-child(5)").first().childNode(1).toString().trim());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[4]
// .infoContent > p:nth-child(6)
resultMap.put("countSeries",
infoContent.select("p:nth-child(6)").first().childNode(1).toString().trim());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[5]/a
// .infoContent > p:nth-child(7) > a:nth-child(2)
resultMap.put("director",
infoContent.select("p:nth-child(7) > a:nth-child(2)").first().text());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[6]
// .infoContent > p:nth-child(8)
resultMap.put("description",
infoContent.select("p:nth-child(8)").first().childNodes().stream()
.skip(1)
.map(node -> node.toString().trim())
.filter(text -> !text.isEmpty() && !text.equalsIgnoreCase("<br>"))
.collect(Collectors.joining(" ")));
return resultMap;
} }
} }