imdb-spider/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Worker.java

61 lines
1.7 KiB
Java
Raw Normal View History

2019-01-09 17:57:37 +03:00
package ru.bvn13.imdbspider.spider.tasker;
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.exceptions.extractor.HtmlExtractorException;
import ru.bvn13.imdbspider.exceptions.processor.HtmlProcessorException;
2019-01-09 17:57:37 +03:00
import ru.bvn13.imdbspider.spider.extractor.HtmlExtractor;
import ru.bvn13.imdbspider.spider.processor.HtmlProcessor;
import ru.bvn13.imdbspider.spider.processor.JsoupHtmlProcessor;
2019-01-09 17:57:37 +03:00
import java.util.List;
import java.util.concurrent.*;
/**
* @author boyko_vn at 09.01.2019
*/
public class Worker {
2019-01-09 17:57:37 +03:00
private final String url;
private final List<Task> tasks;
private final HtmlExtractor htmlExtractor;
private final HtmlProcessor htmlProcessor;
private final ExecutorService executor;
public Worker(String url, List<Task> tasks) {
this.url = url;
this.tasks = tasks;
this.htmlExtractor = new HtmlExtractor();
this.htmlProcessor = new JsoupHtmlProcessor();
2019-01-09 17:57:37 +03:00
this.executor = Executors.newCachedThreadPool();
}
public Boolean run() throws HtmlExtractorException {
2019-01-09 17:57:37 +03:00
final String html = htmlExtractor.getHtml(url);
2019-01-09 17:57:37 +03:00
tasks.parallelStream().forEach(task -> {
2019-01-09 17:57:37 +03:00
try {
if (task.getCssSelector() != null && !task.getCssSelector().isEmpty()) {
task.setCssSelectorResult(htmlProcessor.process(html, task.getCssSelector()));
}
if (task.getPostprocess() != null) {
task.getPostprocess().accept(task, html);
}
} catch (HtmlProcessorException e) {
task.setException(new ImdbSpiderException(e));
e.printStackTrace();
2019-01-09 17:57:37 +03:00
}
2019-01-09 17:57:37 +03:00
});
return true;
2019-01-09 17:57:37 +03:00
}
}