diff --git a/pom.xml b/pom.xml index 30fccda..ec74edc 100644 --- a/pom.xml +++ b/pom.xml @@ -20,6 +20,7 @@ UTF-8 1.8 1.7.21 + 4.2.5.RELEASE @@ -29,12 +30,52 @@ slf4j-api ${slf4j.version} + + org.slf4j + jcl-over-slf4j + ${slf4j.version} + org.slf4j slf4j-simple ${slf4j.version} runtime + + + + org.springframework + spring-core + ${spring.version} + + + commons-logging + commons-logging + + + + + org.springframework + spring-context + ${spring.version} + + + + + commons-io + commons-io + 2.5 + + + org.apache.httpcomponents + httpclient + 4.5.2 + + + org.jsoup + jsoup + 1.8.3 + diff --git a/src/main/java/kinosearch/kinosearch3/Main.java b/src/main/java/kinosearch/kinosearch3/Main.java deleted file mode 100644 index af85116..0000000 --- a/src/main/java/kinosearch/kinosearch3/Main.java +++ /dev/null @@ -1,13 +0,0 @@ -/* - * DmitriyMX - * 2017-04-01 - */ -package kinosearch.kinosearch3; - -import org.slf4j.LoggerFactory; - -public class Main { - public static void main(String[] args) { - LoggerFactory.getLogger("main").info("hello"); - } -} diff --git a/src/main/java/kinosearch/kinosearch3/browser/ApacheBrowser.java b/src/main/java/kinosearch/kinosearch3/browser/ApacheBrowser.java new file mode 100644 index 0000000..9fdafab --- /dev/null +++ b/src/main/java/kinosearch/kinosearch3/browser/ApacheBrowser.java @@ -0,0 +1,45 @@ +/* + * DmitriyMX + * 2017-04-01 + */ +package kinosearch.kinosearch3.browser; + +import org.apache.commons.io.IOUtils; +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.HttpClients; + +import java.io.IOException; + +public class ApacheBrowser implements Browser { + private String encoding; + + @Override + public void setEncoding(String encoding) { + this.encoding = encoding; + } + + @Override + public String get(String url) { + HttpClient client = HttpClients.createDefault(); + HttpGet request = new HttpGet(url); + setup_headers(request); + + String result = ""; + try { + HttpResponse response = client.execute(request); + result = IOUtils.toString(response.getEntity().getContent(), encoding); + } catch (IOException ignore) { + // ignore + } + + return result; + } + + private void setup_headers(HttpGet request) { + request.addHeader("Connection", "close"); + request.addHeader("Accept-Encoding", "deflate"); + request.addHeader("User-Agent", "Mozilla/5.0 (Linux; Android 4.2.2; GT-I9505 Build/JDQ39) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.59 Mobile Safari/537.36"); + } +} diff --git a/src/main/java/kinosearch/kinosearch3/browser/Browser.java b/src/main/java/kinosearch/kinosearch3/browser/Browser.java new file mode 100644 index 0000000..07f57a8 --- /dev/null +++ b/src/main/java/kinosearch/kinosearch3/browser/Browser.java @@ -0,0 +1,10 @@ +/* + * DmitriyMX + * 2017-04-01 + */ +package kinosearch.kinosearch3.browser; + +public interface Browser { + void setEncoding(String encoding); + String get(String url); +} diff --git a/src/main/java/kinosearch/kinosearch3/cinema/onlinelife/ScannerImpl.java b/src/main/java/kinosearch/kinosearch3/cinema/onlinelife/ScannerImpl.java new file mode 100644 index 0000000..9d9c9b2 --- /dev/null +++ b/src/main/java/kinosearch/kinosearch3/cinema/onlinelife/ScannerImpl.java @@ -0,0 +1,68 @@ +/* + * DmitriyMX + * 2017-04-01 + */ +package kinosearch.kinosearch3.cinema.onlinelife; + +import kinosearch.kinosearch3.browser.Browser; +import kinosearch.kinosearch3.spider.FileDownloader; +import kinosearch.kinosearch3.spider.ScannerCinema; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; + +public class ScannerImpl implements ScannerCinema { + private Logger logger = LoggerFactory.getLogger(ScannerImpl.class); + private FileDownloader fileDownloader; + private Browser browser; + private File saveToDir; + + public ScannerImpl(FileDownloader fileDownloader, Browser browser, File saveToDir) { + this.fileDownloader = fileDownloader; + this.browser = browser; + this.saveToDir = saveToDir; + if (!this.saveToDir.mkdirs() && !this.saveToDir.exists()) { + throw new IllegalStateException(String.format("dir not found: '%s'", this.saveToDir.getAbsolutePath())); + } + } + + @Override + public void run() { + int lastPage = getLastPage(); + + for (int page = lastPage; page > 0; page--) { + logger.info("page #{}...", page); + String html = browser.get(String.format("http://www.online-life.cc/filmy/zarubezh-filmy/page/%d/", page)); + Document document = Jsoup.parse(html); + Elements elements = document.getElementsByClass("slider-item"); + + elements.stream() + .map(element -> element.children().get(0)) + .forEach(element -> { + String str = element.attr("href"); + str = str.substring(str.lastIndexOf("/") + 1, str.lastIndexOf(".")); + + String url = element.children().get(0).children().get(0).attr("src"); + + this.fileDownloader.addFile(url, new File(saveToDir, str + ".jpg")); + }); + } + } + + private int getLastPage() { + String html = browser.get("http://www.online-life.cc/filmy/zarubezh-filmy/"); + + Document document = Jsoup.parse(html); + Elements elements = document.getElementsByClass("navigation"); + elements = elements.get(0).children(); + Element element = elements.get(elements.size() - 2); + + String[] parts = element.attr("href").split("/"); + return Integer.parseInt(parts[parts.length-1]); + } +} diff --git a/src/main/java/kinosearch/kinosearch3/spider/FileDownloader.java b/src/main/java/kinosearch/kinosearch3/spider/FileDownloader.java new file mode 100644 index 0000000..466597c --- /dev/null +++ b/src/main/java/kinosearch/kinosearch3/spider/FileDownloader.java @@ -0,0 +1,116 @@ +/* + * DmitriyMX + * 2017-04-01 + */ +package kinosearch.kinosearch3.spider; + +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.channels.FileLock; +import java.util.AbstractMap; +import java.util.Map; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; + +public class FileDownloader implements Runnable { + private Logger logger = LoggerFactory.getLogger(FileDownloader.class); + private final BlockingQueue> listOfEntries; + private final int threadCount; + private ThreadGroup threadGroup; + + public FileDownloader(int threadCount, int capacity) { + this.threadCount = threadCount; + this.listOfEntries = new ArrayBlockingQueue<>(capacity, true); + } + + public void addFile(String url, File saveTo) { + try { + listOfEntries.put(new AbstractMap.SimpleEntry<>(url, saveTo)); + } catch (InterruptedException ignore) { + if(logger.isTraceEnabled()) { + logger.trace("cancel add file to queue"); + } + } + } + + void start() { + this.threadGroup = new ThreadGroup("FileDownloader"); + for (int i = 1; i <= this.threadCount; i++) { + if (logger.isDebugEnabled()) { + logger.debug("Start '{}' thread", "Downloader #"+i); + } + (new Thread(threadGroup, this, "Downloader #"+i)).start(); + } + } + + public void stop() { + if (logger.isTraceEnabled()) { + logger.trace("Stopped threads"); + } + this.threadGroup.interrupt(); + } + + @Override + public void run() { + Logger logger = LoggerFactory.getLogger(getClass()); + + while (!Thread.currentThread().isInterrupted()) { + Map.Entry entry; + try { + entry = listOfEntries.take(); + if (logger.isDebugEnabled()) { + logger.debug("take url for file '{}'", entry.getValue().getName()); + } + } catch (InterruptedException e) { + if (logger.isTraceEnabled()) { + logger.trace("cancel take file by queue"); + } + return; + } + + FileLock fileLock = null; + URLConnection urlConnection = null; + try { + FileOutputStream fos = new FileOutputStream(entry.getValue()); + fileLock = fos.getChannel().lock(); + + URL url = new URL(entry.getKey()); + urlConnection = url.openConnection(); + + IOUtils.copy(urlConnection.getInputStream(), fos); + } catch (IOException e) { + logger.error("", e); + } finally { + if (fileLock != null) { + try { + fileLock.release(); + } catch (IOException e) { + e.printStackTrace(); + } + } + if (urlConnection != null) { + try { + urlConnection.getInputStream().close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + if (logger.isDebugEnabled()) { + logger.trace("file downloaded: '{}'", entry.getValue().getName()); + } + } + } + + if (logger.isTraceEnabled()) { + logger.debug("Thred closed"); + } + } +} diff --git a/src/main/java/kinosearch/kinosearch3/spider/Main.java b/src/main/java/kinosearch/kinosearch3/spider/Main.java new file mode 100644 index 0000000..09827f1 --- /dev/null +++ b/src/main/java/kinosearch/kinosearch3/spider/Main.java @@ -0,0 +1,16 @@ +/* + * DmitriyMX + * 2017-04-01 + */ +package kinosearch.kinosearch3.spider; + +import org.springframework.context.ApplicationContext; +import org.springframework.context.support.ClassPathXmlApplicationContext; + +public class Main { + public static void main(String[] args) { + ApplicationContext ctx = new ClassPathXmlApplicationContext("/kinosearch/kinosearch3/spider/spring.xml"); + Spider spider = ctx.getBean("spider", Spider.class); + spider.start(); + } +} diff --git a/src/main/java/kinosearch/kinosearch3/spider/ScannerCinema.java b/src/main/java/kinosearch/kinosearch3/spider/ScannerCinema.java new file mode 100644 index 0000000..dcaa20e --- /dev/null +++ b/src/main/java/kinosearch/kinosearch3/spider/ScannerCinema.java @@ -0,0 +1,9 @@ +/* + * DmitriyMX + * 2017-04-01 + */ +package kinosearch.kinosearch3.spider; + +public interface ScannerCinema { + void run(); +} diff --git a/src/main/java/kinosearch/kinosearch3/spider/Spider.java b/src/main/java/kinosearch/kinosearch3/spider/Spider.java new file mode 100644 index 0000000..618280f --- /dev/null +++ b/src/main/java/kinosearch/kinosearch3/spider/Spider.java @@ -0,0 +1,23 @@ +/* + * DmitriyMX + * 2017-04-02 + */ +package kinosearch.kinosearch3.spider; + +import java.util.Collections; +import java.util.List; + +public class Spider { + private List scanners; + private FileDownloader fileDownloader; + + public Spider(List scanners, FileDownloader fileDownloader) { + this.scanners = Collections.unmodifiableList(scanners); + this.fileDownloader = fileDownloader; + } + + void start() { + fileDownloader.start(); + scanners.forEach(ScannerCinema::run); + } +} diff --git a/src/main/resources/kinosearch/kinosearch3/spider/spring.xml b/src/main/resources/kinosearch/kinosearch3/spider/spring.xml new file mode 100644 index 0000000..ee29be4 --- /dev/null +++ b/src/main/resources/kinosearch/kinosearch3/spider/spring.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file