0

Наметки сбора информации о видео

http://animevost.org/
This commit is contained in:
2019-01-07 15:57:22 +03:00
parent 21306e1c8d
commit 5a5f671b02
6 changed files with 163 additions and 9 deletions

View File

@@ -76,6 +76,23 @@
<version>${jetty.version}</version>
</dependency>
<!-- BROWSER -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
<!-- COMPONENTS -->
<dependency>
<groupId>org.projectlombok</groupId>

View File

@@ -4,17 +4,23 @@ import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.http.converter.HttpMessageConverter;
import org.springframework.http.converter.json.GsonHttpMessageConverter;
import org.springframework.web.servlet.config.annotation.ContentNegotiationConfigurer;
import org.springframework.web.servlet.config.annotation.EnableWebMvc;
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
import java.util.List;
@Configuration
@ComponentScan({ "ks.server.controllers" })
@ComponentScan({ "ks.server.controllers", "ks.server.browser" })
@EnableWebMvc
public class SpringConfigMVC implements WebMvcConfigurer {
@Override
public void configureMessageConverters(List<HttpMessageConverter<?>> converters) {
converters.add(new GsonHttpMessageConverter());
}
@Override
public void configureContentNegotiation(ContentNegotiationConfigurer configurer) {
configurer.favorPathExtension(false);
}
}

View File

@@ -1,7 +1,5 @@
package ks.server;
import ks.server.controllers.ErrorPageController;
import ks.server.controllers.WebController;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.eclipse.jetty.server.ConnectionFactory;

View File

@@ -0,0 +1,47 @@
package ks.server.browser;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.springframework.context.annotation.Scope;
import org.springframework.context.annotation.ScopedProxyMode;
import org.springframework.stereotype.Component;
import org.springframework.web.context.WebApplicationContext;
import java.io.IOException;
@Slf4j
@Component
@Scope(scopeName = WebApplicationContext.SCOPE_REQUEST, proxyMode = ScopedProxyMode.TARGET_CLASS)
public class ApacheBrowser implements Browser {
@Setter
private String encoding = "UTF-8";
private void setupHeaders(HttpGet request) {
request.addHeader("Connection", "close");
request.addHeader("Accept-Encoding", "deflate");
request.addHeader("User-Agent", "Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/67.0.3396.87 Mobile Safari/537.36");
}
@Override
public String get(String url) {
HttpClient client = HttpClients.createDefault();
HttpGet request = new HttpGet(url);
setupHeaders(request);
String result = "";
try {
HttpResponse response = client.execute(request);
result = IOUtils.toString(response.getEntity().getContent(), encoding);
} catch (IOException e) {
log.warn("Error i/o from GET url \"{}\": {}", url, e.getMessage());
log.debug("", e);
}
return result;
}
}

View File

@@ -0,0 +1,6 @@
package ks.server.browser;
public interface Browser {
void setEncoding(String encoding);
String get(String url);
}

View File

@@ -1,21 +1,101 @@
package ks.server.controllers;
import ks.server.browser.Browser;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import javax.servlet.http.HttpServletRequest;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Controller
@RequestMapping(path = "/", produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
@ResponseBody
@RestController
@RequestMapping(
path = "/",
method = RequestMethod.GET,
produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
public class WebController {
@Autowired
private Browser browser;
@RequestMapping(method = RequestMethod.GET)
private String subpath(String requestUri, String marker) {
return requestUri.substring(requestUri.indexOf(marker) + marker.length());
}
@RequestMapping
public Map<String, String> index() {
return Collections.singletonMap("message", "hello?");
}
@RequestMapping(path = "/c/animevost/info/**")
public Map<String, Object> animevostInfo(HttpServletRequest request) {
final String domainUri = "http://animevost.org/";
final String path = subpath(request.getRequestURI(), "/info/");
final String cinemaRequestUri = domainUri + path;
final Map<String, Object> resultMap = new HashMap<>();
resultMap.put("_uri", cinemaRequestUri);
Document htmlDocument = Jsoup.parse(browser.get(cinemaRequestUri));
// /html/body/section/div/div[1]/div[2]/div[2]
// .infoContent
Element infoContent = htmlDocument.select(".infoContent").first();
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/h3
// .infoContent > h3:nth-child(1)
resultMap.put("title",
infoContent.select("h3:nth-child(1)").first().text());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/div[1]/img
// .infoContent > div:nth-child(2) > img:nth-child(1)
resultMap.put("poster", domainUri +
infoContent.select("div:nth-child(2) > img:nth-child(1)").first().attr("src"));
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[1]
// .infoContent > p:nth-child(3)
resultMap.put("year",
Integer.parseInt(infoContent.select("p:nth-child(3)").first().childNode(1).toString().trim()));
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[2]
// .infoContent > p:nth-child(4)
resultMap.put("genre", Stream.of(
infoContent.select("p:nth-child(4)").first().childNode(1).toString().trim()
.split(","))
.map(String::trim).collect(Collectors.toList()));
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[3]
// .infoContent > p:nth-child(5)
resultMap.put("type",
infoContent.select("p:nth-child(5)").first().childNode(1).toString().trim());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[4]
// .infoContent > p:nth-child(6)
resultMap.put("countSeries",
infoContent.select("p:nth-child(6)").first().childNode(1).toString().trim());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[5]/a
// .infoContent > p:nth-child(7) > a:nth-child(2)
resultMap.put("director",
infoContent.select("p:nth-child(7) > a:nth-child(2)").first().text());
// /html/body/section/div/div[1]/div[2]/div[2]/div[1]/p[6]
// .infoContent > p:nth-child(8)
resultMap.put("description",
infoContent.select("p:nth-child(8)").first().childNodes().stream()
.skip(1)
.map(node -> node.toString().trim())
.filter(text -> !text.isEmpty() && !text.equalsIgnoreCase("<br>"))
.collect(Collectors.joining(" ")));
return resultMap;
}
}