imdb-spider/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Manager.java

74 lines
1.9 KiB
Java
Raw Normal View History

2019-01-09 17:57:37 +03:00
package ru.bvn13.imdbspider.spider.tasker;
import ru.bvn13.imdbspider.exceptions.extractor.HtmlExtractorException;
2019-01-09 17:57:37 +03:00
import java.util.ArrayList;
import java.util.HashMap;
2019-01-09 17:57:37 +03:00
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
/**
* @author boyko_vn at 09.01.2019
*/
public class Manager {
2019-01-16 17:55:40 +03:00
private boolean isDebug;
private Map<String, String> httpRequestHeaders = new HashMap<>();
2019-01-09 17:57:37 +03:00
2019-01-16 17:55:40 +03:00
public boolean isDebug() {
return isDebug;
}
public void setDebug(boolean debug) {
isDebug = debug;
}
public void addHttpRequestHeader(String key, String value) {
this.httpRequestHeaders.put(key, value);
2019-01-09 17:57:37 +03:00
}
public void processTasks(List<Task> allTasks) {
2019-01-09 17:57:37 +03:00
Map<String, List<Task>> groupedTasks = new ConcurrentHashMap<>(allTasks.size());
for (Task task : allTasks) {
List<Task> filteredTasks = null;
if (groupedTasks.keySet().contains(task.getUrl())) {
filteredTasks = groupedTasks.get(task.getUrl());
} else {
filteredTasks = new ArrayList<>();
groupedTasks.put(task.getUrl(), filteredTasks);
}
filteredTasks.add(task);
}
groupedTasks.entrySet().parallelStream().forEach(stringListEntry -> {
Worker w = new Worker(stringListEntry.getKey(), stringListEntry.getValue());
2019-01-16 17:55:40 +03:00
w.setDebug(isDebug);
2019-01-09 17:57:37 +03:00
try {
w.run(httpRequestHeaders);
} catch (HtmlExtractorException e) {
2019-01-09 17:57:37 +03:00
e.printStackTrace();
}
});
List<Task> nextTasks = new ArrayList<>();
2019-01-09 17:57:37 +03:00
for (Task task : allTasks) {
if (task.hasNextTasks()) {
nextTasks.addAll(task.getNestedTasks());
}
}
if (!nextTasks.isEmpty()) {
processTasks(nextTasks);
}
2019-01-09 17:57:37 +03:00
}
}