diff --git a/.gitignore b/.gitignore
index a1c2a23..a043496 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,8 @@
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
+
+#intellij
+*.ipr
+*.iws
+*.iml
diff --git a/core/pom.xml b/core/pom.xml
new file mode 100644
index 0000000..6d37707
--- /dev/null
+++ b/core/pom.xml
@@ -0,0 +1,43 @@
+
+
+
+
+ parent
+ ru.bvn13.imdbspider
+ 1.0-SNAPSHOT
+
+
+ 4.0.0
+
+ imdb-spider-core
+
+ IMDB-SPIDER :: CORE
+
+ jar
+
+
+ UTF-8
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ junit
+ junit
+ 4.11
+ test
+
+
+
+
diff --git a/core/src/main/java/module-info.java b/core/src/main/java/module-info.java
new file mode 100644
index 0000000..001f792
--- /dev/null
+++ b/core/src/main/java/module-info.java
@@ -0,0 +1,9 @@
+module imdb.spider.core {
+ //exports ru.bvn13.imdbspider;
+ exports ru.bvn13.imdbspider.imdb;
+ exports ru.bvn13.imdbspider.spider.tasker;
+ exports ru.bvn13.imdbspider.exceptions;
+
+ requires java.xml;
+
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java b/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java
new file mode 100644
index 0000000..ab8b288
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java
@@ -0,0 +1,78 @@
+package ru.bvn13.imdbspider;
+
+import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException;
+import ru.bvn13.imdbspider.imdb.Movie;
+import ru.bvn13.imdbspider.imdb.MovieDataType;
+import ru.bvn13.imdbspider.spider.api.v1_0.ApiFactory_1_0;
+import ru.bvn13.imdbspider.spider.tasker.Manager;
+import ru.bvn13.imdbspider.spider.tasker.Task;
+import ru.bvn13.imdbspider.spider.api.ApiFactory;
+
+import java.net.URLEncoder;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class ImdbSpider {
+
+ private static final String URL_MAIN = "https://www.imdb.com/";
+ private static final String URL_SEARCH_TITLE = "https://www.imdb.com/find?ref_=nv_sr_fn&q={{title}}&s=tt";
+
+ private Manager manager;
+
+ private ApiFactory apiFactory;
+
+ public static ImdbSpider withApi_1_0() {
+ return new ImdbSpider(new ApiFactory_1_0());
+ }
+
+
+ public ImdbSpider(ApiFactory apiFactory) {
+
+ manager = new Manager();
+
+ }
+
+ public List searchMovieByTitle(String title) {
+ return searchMovieByTitle(title, 10);
+ }
+
+ public List searchMovieByTitle(String title, int maxCount) {
+ return searchMovieByTitle(title, maxCount, EnumSet.of(MovieDataType.TITLE));
+ }
+
+ public List searchMovieByTitle(String title, int maxCount, EnumSet dataTypes) {
+
+ String url = URL_SEARCH_TITLE.replace("{{title}}", URLEncoder.encode(title, Charset.forName("utf-8")));
+
+ List tasks = new ArrayList<>();
+
+ for (MovieDataType mdt : MovieDataType.values()) {
+ if (dataTypes.contains(mdt)) {
+ try {
+ tasks.add(apiFactory.taskByDataType(mdt));
+ } catch (DataTypeNotSupportedException e) {
+ //do nothing
+ e.printStackTrace();
+ }
+ }
+ }
+
+ try {
+ tasks = manager.processTasks(tasks);
+ } catch (ExecutionException e) {
+ e.printStackTrace();
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+
+ return null;
+
+ }
+
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/exceptions/ImdbSpiderException.java b/core/src/main/java/ru/bvn13/imdbspider/exceptions/ImdbSpiderException.java
new file mode 100644
index 0000000..76f20f3
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/exceptions/ImdbSpiderException.java
@@ -0,0 +1,26 @@
+package ru.bvn13.imdbspider.exceptions;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class ImdbSpiderException extends Exception {
+
+ public ImdbSpiderException() {
+ }
+
+ public ImdbSpiderException(String message) {
+ super(message);
+ }
+
+ public ImdbSpiderException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public ImdbSpiderException(Throwable cause) {
+ super(cause);
+ }
+
+ public ImdbSpiderException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/exceptions/api/DataTypeNotSupportedException.java b/core/src/main/java/ru/bvn13/imdbspider/exceptions/api/DataTypeNotSupportedException.java
new file mode 100644
index 0000000..ea0ca6b
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/exceptions/api/DataTypeNotSupportedException.java
@@ -0,0 +1,28 @@
+package ru.bvn13.imdbspider.exceptions.api;
+
+import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class DataTypeNotSupportedException extends ImdbSpiderException {
+
+ public DataTypeNotSupportedException() {
+ }
+
+ public DataTypeNotSupportedException(String message) {
+ super(message);
+ }
+
+ public DataTypeNotSupportedException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public DataTypeNotSupportedException(Throwable cause) {
+ super(cause);
+ }
+
+ public DataTypeNotSupportedException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/exceptions/extractor/ConnectionEstablishingException.java b/core/src/main/java/ru/bvn13/imdbspider/exceptions/extractor/ConnectionEstablishingException.java
new file mode 100644
index 0000000..33cc838
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/exceptions/extractor/ConnectionEstablishingException.java
@@ -0,0 +1,26 @@
+package ru.bvn13.imdbspider.exceptions.extractor;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class ConnectionEstablishingException extends HtmlExtractorException {
+
+ public ConnectionEstablishingException() {
+ }
+
+ public ConnectionEstablishingException(String message) {
+ super(message);
+ }
+
+ public ConnectionEstablishingException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public ConnectionEstablishingException(Throwable cause) {
+ super(cause);
+ }
+
+ public ConnectionEstablishingException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/exceptions/extractor/HtmlExtractorException.java b/core/src/main/java/ru/bvn13/imdbspider/exceptions/extractor/HtmlExtractorException.java
new file mode 100644
index 0000000..4d826b8
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/exceptions/extractor/HtmlExtractorException.java
@@ -0,0 +1,27 @@
+package ru.bvn13.imdbspider.exceptions.extractor;
+
+import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class HtmlExtractorException extends ImdbSpiderException {
+ public HtmlExtractorException() {
+ }
+
+ public HtmlExtractorException(String message) {
+ super(message);
+ }
+
+ public HtmlExtractorException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public HtmlExtractorException(Throwable cause) {
+ super(cause);
+ }
+
+ public HtmlExtractorException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/exceptions/extractor/MalformedUrlException.java b/core/src/main/java/ru/bvn13/imdbspider/exceptions/extractor/MalformedUrlException.java
new file mode 100644
index 0000000..90fd899
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/exceptions/extractor/MalformedUrlException.java
@@ -0,0 +1,27 @@
+package ru.bvn13.imdbspider.exceptions.extractor;
+
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class MalformedUrlException extends HtmlExtractorException {
+
+ public MalformedUrlException() {
+ }
+
+ public MalformedUrlException(String message) {
+ super(message);
+ }
+
+ public MalformedUrlException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public MalformedUrlException(Throwable cause) {
+ super(cause);
+ }
+
+ public MalformedUrlException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/exceptions/processor/HtmlProcessorException.java b/core/src/main/java/ru/bvn13/imdbspider/exceptions/processor/HtmlProcessorException.java
new file mode 100644
index 0000000..6912140
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/exceptions/processor/HtmlProcessorException.java
@@ -0,0 +1,28 @@
+package ru.bvn13.imdbspider.exceptions.processor;
+
+import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class HtmlProcessorException extends ImdbSpiderException {
+
+ public HtmlProcessorException() {
+ }
+
+ public HtmlProcessorException(String message) {
+ super(message);
+ }
+
+ public HtmlProcessorException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public HtmlProcessorException(Throwable cause) {
+ super(cause);
+ }
+
+ public HtmlProcessorException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/exceptions/processor/HtmlToXmlConvertionException.java b/core/src/main/java/ru/bvn13/imdbspider/exceptions/processor/HtmlToXmlConvertionException.java
new file mode 100644
index 0000000..ea2ca47
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/exceptions/processor/HtmlToXmlConvertionException.java
@@ -0,0 +1,26 @@
+package ru.bvn13.imdbspider.exceptions.processor;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class HtmlToXmlConvertionException extends HtmlProcessorException {
+
+ public HtmlToXmlConvertionException() {
+ }
+
+ public HtmlToXmlConvertionException(String message) {
+ super(message);
+ }
+
+ public HtmlToXmlConvertionException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public HtmlToXmlConvertionException(Throwable cause) {
+ super(cause);
+ }
+
+ public HtmlToXmlConvertionException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/exceptions/processor/PatternEvaluationException.java b/core/src/main/java/ru/bvn13/imdbspider/exceptions/processor/PatternEvaluationException.java
new file mode 100644
index 0000000..c66d336
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/exceptions/processor/PatternEvaluationException.java
@@ -0,0 +1,26 @@
+package ru.bvn13.imdbspider.exceptions.processor;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class PatternEvaluationException extends HtmlProcessorException {
+
+ public PatternEvaluationException() {
+ }
+
+ public PatternEvaluationException(String message) {
+ super(message);
+ }
+
+ public PatternEvaluationException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public PatternEvaluationException(Throwable cause) {
+ super(cause);
+ }
+
+ public PatternEvaluationException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/DataType.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/DataType.java
new file mode 100644
index 0000000..8e910d1
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/DataType.java
@@ -0,0 +1,10 @@
+package ru.bvn13.imdbspider.imdb;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public interface DataType {
+
+ String get();
+
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/ImdbObject.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/ImdbObject.java
new file mode 100644
index 0000000..9db0a44
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/ImdbObject.java
@@ -0,0 +1,26 @@
+package ru.bvn13.imdbspider.imdb;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class ImdbObject {
+
+ private int id;
+ private String url;
+
+ public int getId() {
+ return id;
+ }
+
+ public void setId(int id) {
+ this.id = id;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/Movie.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/Movie.java
new file mode 100644
index 0000000..4c2b039
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/Movie.java
@@ -0,0 +1,30 @@
+package ru.bvn13.imdbspider.imdb;
+
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class Movie extends ImdbObject {
+
+ private String title;
+ private Map akas = new ConcurrentHashMap<>(50);
+
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public Map getAkas() {
+ return akas;
+ }
+
+ public void setAkas(Map akas) {
+ this.akas = akas;
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieDataType.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieDataType.java
new file mode 100644
index 0000000..7d0b8cb
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieDataType.java
@@ -0,0 +1,26 @@
+package ru.bvn13.imdbspider.imdb;
+
+import java.util.EnumSet;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public enum MovieDataType implements DataType {
+
+ TITLE("title")
+
+ ;
+
+ private String value;
+
+ MovieDataType(String v) {
+ value = v;
+ }
+
+ public static final EnumSet ALL_DATA = EnumSet.allOf(MovieDataType.class);
+
+ @Override
+ public String get() {
+ return value;
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieList.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieList.java
new file mode 100644
index 0000000..08d3849
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieList.java
@@ -0,0 +1,10 @@
+package ru.bvn13.imdbspider.imdb;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class MovieList extends ImdbObject {
+
+
+
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java
new file mode 100644
index 0000000..9ccd0ca
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java
@@ -0,0 +1,17 @@
+package ru.bvn13.imdbspider.spider.api;
+
+import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException;
+import ru.bvn13.imdbspider.imdb.DataType;
+import ru.bvn13.imdbspider.imdb.ImdbObject;
+import ru.bvn13.imdbspider.spider.tasker.Task;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public interface ApiFactory {
+
+ Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException;
+
+ void fulfillImdbObject(ImdbObject imdbObject, Task task);
+
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java
new file mode 100644
index 0000000..d5da9b2
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java
@@ -0,0 +1,47 @@
+package ru.bvn13.imdbspider.spider.api.v1_0;
+
+import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException;
+import ru.bvn13.imdbspider.imdb.DataType;
+import ru.bvn13.imdbspider.imdb.ImdbObject;
+import ru.bvn13.imdbspider.imdb.Movie;
+import ru.bvn13.imdbspider.imdb.MovieDataType;
+import ru.bvn13.imdbspider.spider.api.ApiFactory;
+import ru.bvn13.imdbspider.spider.tasker.Task;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class ApiFactory_1_0 implements ApiFactory {
+
+ @Override
+ public Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException {
+ if (dataType instanceof MovieDataType) {
+ return taskByMovieDataType((MovieDataType) dataType);
+ } else {
+ throw new DataTypeNotSupportedException(String.format("DataType %s not supported by API v1_0!", dataType.getClass().getName()));
+ }
+ }
+
+ @Override
+ public void fulfillImdbObject(ImdbObject imdbObject, Task task) {
+ if (imdbObject instanceof Movie) {
+ if (task.getDataType() instanceof MovieDataType) {
+ fulfillMovie((Movie) imdbObject, task);
+ }
+ }
+ }
+
+ private Task taskByMovieDataType(MovieDataType movieDataType) {
+ switch (movieDataType) {
+ case TITLE: return new Task();
+ default: return null;
+ }
+ }
+
+ private void fulfillMovie(Movie movie, Task task) {
+ switch ((MovieDataType) task.getDataType()) {
+ case TITLE: movie.setTitle(task.getResult()); break;
+ }
+ }
+
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/extractor/HtmlExtractor.java b/core/src/main/java/ru/bvn13/imdbspider/spider/extractor/HtmlExtractor.java
new file mode 100644
index 0000000..d0a42a5
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/spider/extractor/HtmlExtractor.java
@@ -0,0 +1,87 @@
+package ru.bvn13.imdbspider.spider.extractor;
+
+import ru.bvn13.imdbspider.exceptions.extractor.HtmlExtractorException;
+import ru.bvn13.imdbspider.exceptions.extractor.MalformedUrlException;
+import ru.bvn13.imdbspider.exceptions.extractor.ConnectionEstablishingException;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.*;
+import java.util.Map;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class HtmlExtractor {
+
+ private static final String UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36";
+
+ private static String getParamsString(Map params) throws UnsupportedEncodingException {
+ StringBuilder result = new StringBuilder();
+
+ for (Map.Entry entry : params.entrySet()) {
+ result.append(URLEncoder.encode(entry.getKey(), "UTF-8"));
+ result.append("=");
+ result.append(URLEncoder.encode(entry.getValue(), "UTF-8"));
+ result.append("&");
+ }
+
+ String resultString = result.toString();
+ return resultString.length() > 0
+ ? resultString.substring(0, resultString.length() - 1)
+ : resultString;
+ }
+
+ public String getHtml(String url) throws HtmlExtractorException {
+
+ URL obj = null;
+
+ try {
+ obj = new URL(url);
+ } catch (MalformedURLException e) {
+ throw new MalformedUrlException(String.format("Wrong url: %s", url), e);
+ }
+
+ HttpURLConnection connection = null;
+ try {
+ connection = (HttpURLConnection) obj.openConnection();
+ } catch (IOException e) {
+ throw new ConnectionEstablishingException(String.format("Unable to open connection by utl: %s", url), e);
+ }
+
+ connection.setRequestProperty("Accept", "text/html");
+
+ try {
+ connection.setRequestMethod("GET");
+ } catch (ProtocolException e) {
+ throw new ConnectionEstablishingException(String.format("Wrong protocol GET for utl: %s", url), e);
+ }
+
+ BufferedReader in = null;
+ try {
+ String inputLine;
+ StringBuilder response = new StringBuilder();
+ in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+
+ while ((inputLine = in.readLine()) != null) {
+ response.append(inputLine);
+ }
+
+ return response.toString();
+ } catch (IOException e) {
+ throw new ConnectionEstablishingException(String.format("Could not get input stream for utl: %s", url), e);
+ } finally {
+ try {
+ if (in != null) {
+ in.close();
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ }
+
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/processor/HtmlProcessor.java b/core/src/main/java/ru/bvn13/imdbspider/spider/processor/HtmlProcessor.java
new file mode 100644
index 0000000..f8af553
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/spider/processor/HtmlProcessor.java
@@ -0,0 +1,55 @@
+package ru.bvn13.imdbspider.spider.processor;
+
+
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
+import ru.bvn13.imdbspider.exceptions.processor.HtmlProcessorException;
+import ru.bvn13.imdbspider.exceptions.processor.HtmlToXmlConvertionException;
+import ru.bvn13.imdbspider.exceptions.processor.PatternEvaluationException;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class HtmlProcessor {
+
+ public String process(final String html, final String pattern) throws HtmlProcessorException {
+ DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+ DocumentBuilder db = null;
+ try {
+ db = dbf.newDocumentBuilder();
+ } catch (ParserConfigurationException e) {
+ throw new HtmlProcessorException(e);
+ }
+ Document xml = null;
+ try {
+ xml = db.parse(new ByteArrayInputStream(html.getBytes(StandardCharsets.UTF_8)));
+ } catch (SAXException e) {
+ throw new HtmlToXmlConvertionException("Html parsing exception", e);
+ } catch (IOException e) {
+ throw new HtmlToXmlConvertionException("Html reading exception", e);
+ }
+
+ XPathFactory xpf = XPathFactory.newInstance();
+ XPath xpath = xpf.newXPath();
+ String result = null;
+ try {
+ result = (String) xpath.evaluate(pattern, xml, XPathConstants.STRING);
+ } catch (XPathExpressionException e) {
+ throw new PatternEvaluationException(String.format("Could not evaluate pattern: %s", pattern), e);
+ }
+
+ return result;
+ }
+
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Manager.java b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Manager.java
new file mode 100644
index 0000000..ce498a9
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Manager.java
@@ -0,0 +1,59 @@
+package ru.bvn13.imdbspider.spider.tasker;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.*;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class Manager {
+
+ private ExecutorService executor;
+
+ public Manager() {
+ this.executor = Executors.newCachedThreadPool();
+ }
+
+
+ public List processTasks(List allTasks) throws ExecutionException, InterruptedException {
+
+ Map> groupedTasks = new ConcurrentHashMap<>(allTasks.size());
+
+ for (Task task : allTasks) {
+ List filteredTasks = null;
+
+ if (groupedTasks.keySet().contains(task.getUrl())) {
+ filteredTasks = groupedTasks.get(task.getUrl());
+ } else {
+ filteredTasks = new ArrayList<>();
+ groupedTasks.put(task.getUrl(), filteredTasks);
+ }
+
+ filteredTasks.add(task);
+ }
+
+ List result = Collections.synchronizedList(new ArrayList<>());
+
+ groupedTasks.entrySet().parallelStream().forEach(stringListEntry -> {
+ Future> r = executor.submit(new Worker(stringListEntry.getKey(), stringListEntry.getValue()));
+ while (!r.isDone()) {
+ Thread.yield();
+ }
+ try {
+ result.addAll(r.get());
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ } catch (ExecutionException e) {
+ e.printStackTrace();
+ }
+ });
+
+
+ return result;
+
+ }
+
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java
new file mode 100644
index 0000000..a26ca32
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java
@@ -0,0 +1,76 @@
+package ru.bvn13.imdbspider.spider.tasker;
+
+import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
+import ru.bvn13.imdbspider.imdb.DataType;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class Task {
+
+ private String url;
+
+ private String xpathPattern;
+ private DataType dataType;
+ private String result;
+
+ private ImdbSpiderException exception;
+
+ public Task() {
+ }
+
+ public Task(String xpathPattern) {
+ this.xpathPattern = xpathPattern;
+ }
+
+ public Task(String url, String xpathPattern) {
+ this.url = url;
+ this.xpathPattern = xpathPattern;
+ }
+
+ public Task(String url, String xpathPattern, DataType dataType) {
+ this.url = url;
+ this.xpathPattern = xpathPattern;
+ this.dataType = dataType;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+
+ public String getXpathPattern() {
+ return xpathPattern;
+ }
+
+ public void setXpathPattern(String xpathPattern) {
+ this.xpathPattern = xpathPattern;
+ }
+
+ public DataType getDataType() {
+ return dataType;
+ }
+
+ public void setDataType(DataType dataType) {
+ this.dataType = dataType;
+ }
+
+ public String getResult() {
+ return result;
+ }
+
+ public void setResult(String result) {
+ this.result = result;
+ }
+
+ public ImdbSpiderException getException() {
+ return exception;
+ }
+
+ public void setException(ImdbSpiderException exception) {
+ this.exception = exception;
+ }
+}
diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Worker.java b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Worker.java
new file mode 100644
index 0000000..df0d6bf
--- /dev/null
+++ b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Worker.java
@@ -0,0 +1,67 @@
+package ru.bvn13.imdbspider.spider.tasker;
+
+import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
+import ru.bvn13.imdbspider.exceptions.extractor.HtmlExtractorException;
+import ru.bvn13.imdbspider.spider.extractor.HtmlExtractor;
+import ru.bvn13.imdbspider.spider.processor.HtmlProcessor;
+
+import java.util.List;
+import java.util.concurrent.*;
+
+/**
+ * @author boyko_vn at 09.01.2019
+ */
+public class Worker implements Callable> {
+
+ private final String url;
+ private final List tasks;
+
+ private final HtmlExtractor htmlExtractor;
+ private final HtmlProcessor htmlProcessor;
+
+ private final ExecutorService executor;
+
+ public Worker(String url, List tasks) {
+ this.url = url;
+ this.tasks = tasks;
+
+ this.htmlExtractor = new HtmlExtractor();
+ this.htmlProcessor = new HtmlProcessor();
+
+ this.executor = Executors.newCachedThreadPool();
+ }
+
+
+ @Override
+ public List call() throws Exception {
+ Future result = executor.submit(() -> htmlExtractor.getHtml(url));
+ while (!result.isDone()) {
+ Thread.yield();
+ }
+
+ final String html;
+ try {
+ html = result.get();
+ } catch (InterruptedException e) {
+ throw new ImdbSpiderException("Interrupted", e);
+ } catch (ExecutionException e) {
+ throw new HtmlExtractorException("Exception has been occurred", e);
+ }
+
+ tasks.parallelStream().forEach(task -> {
+ Future taskResult = executor.submit(() -> htmlProcessor.process(html, task.getXpathPattern()));
+ while (!taskResult.isDone()) {
+ Thread.yield();
+ }
+ try {
+ task.setResult(taskResult.get());
+ } catch (InterruptedException e) {
+ task.setException(new ImdbSpiderException("Interrupted", e));
+ } catch (ExecutionException e) {
+ task.setException(new ImdbSpiderException("Exception has been occurred", e));
+ }
+ });
+
+ return tasks;
+ }
+}
diff --git a/core/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java b/core/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java
new file mode 100644
index 0000000..0616c84
--- /dev/null
+++ b/core/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java
@@ -0,0 +1,26 @@
+package ru.bvn13.imdbspider.runner;
+
+import static org.junit.Assert.assertTrue;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import ru.bvn13.imdbspider.ImdbSpider;
+import ru.bvn13.imdbspider.imdb.Movie;
+
+import java.util.List;
+
+
+public class AppTest
+{
+ private static ImdbSpider spider;
+
+ @BeforeClass
+ public static void initClass() {
+ spider = ImdbSpider.withApi_1_0();
+ }
+
+ @Test
+ public void searchTerminatorTest() {
+ List result = spider.searchMovieByTitle("Терминатор", 5);
+ }
+}
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..86856f6
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,108 @@
+
+
+
+ 4.0.0
+
+ ru.bvn13.imdbspider
+ parent
+ 1.0-SNAPSHOT
+
+ core
+ runner
+
+
+ IMDB-SPIDER :: PARENT
+
+ http://www.example.com
+
+ pom
+
+
+ UTF-8
+ 11
+ ${java.version}
+ ${java.version}
+
+
+
+
+ junit
+ junit
+ 4.11
+ test
+
+
+
+ org.mockito
+ mockito-core
+ 2.20.0
+ test
+
+
+
+
+
+
+
+
+
+ maven-clean-plugin
+ 3.1.0
+
+
+
+ maven-resources-plugin
+ 3.0.2
+
+
+ maven-compiler-plugin
+ 3.8.0
+
+ ${java.version}
+
+
+
+ org.ow2.asm
+ asm
+ 6.2
+
+
+
+
+ maven-surefire-plugin
+ 2.22.1
+
+
+ org.ow2.asm
+ asm
+ 6.2
+
+
+
+
+ maven-jar-plugin
+ 3.0.2
+
+
+ maven-install-plugin
+ 2.5.2
+
+
+ maven-deploy-plugin
+ 2.8.2
+
+
+
+ maven-site-plugin
+ 3.7.1
+
+
+ maven-project-info-reports-plugin
+ 3.0.0
+
+
+
+
+
+
diff --git a/runner/pom.xml b/runner/pom.xml
new file mode 100644
index 0000000..1aee796
--- /dev/null
+++ b/runner/pom.xml
@@ -0,0 +1,86 @@
+
+
+
+
+ parent
+ ru.bvn13.imdbspider
+ 1.0-SNAPSHOT
+
+
+ 4.0.0
+
+ imdb-spider-runner
+
+ IMDB-SPIDER :: RUNNER
+
+ jar
+
+
+ UTF-8
+
+
+
+
+
+ ru.bvn13.imdbspider
+ imdb-spider-core
+ 1.0-SNAPSHOT
+ compile
+
+
+
+ junit
+ junit
+ 4.11
+ test
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/runner/src/main/java/module-info.java b/runner/src/main/java/module-info.java
new file mode 100644
index 0000000..a059167
--- /dev/null
+++ b/runner/src/main/java/module-info.java
@@ -0,0 +1,5 @@
+module imdb.spider.runner {
+ requires imdb.spider.core;
+ requires java.xml;
+
+}
\ No newline at end of file
diff --git a/runner/src/main/java/ru/bvn13/imdbspider/runner/App.java b/runner/src/main/java/ru/bvn13/imdbspider/runner/App.java
new file mode 100644
index 0000000..ffdfb8a
--- /dev/null
+++ b/runner/src/main/java/ru/bvn13/imdbspider/runner/App.java
@@ -0,0 +1,13 @@
+package ru.bvn13.imdbspider.runner;
+
+/**
+ * Hello world!
+ *
+ */
+public class App
+{
+ public static void main( String[] args )
+ {
+ System.out.println( "Hello World!" );
+ }
+}
diff --git a/runner/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java b/runner/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java
new file mode 100644
index 0000000..7884387
--- /dev/null
+++ b/runner/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java
@@ -0,0 +1,17 @@
+package ru.bvn13.imdbspider.runner;
+
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+public class AppTest
+{
+ /**
+ * Rigorous Test :-)
+ */
+ @Test
+ public void shouldAnswerWithTrue()
+ {
+ assertTrue( true );
+ }
+}