diff --git a/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java b/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java index 3454ba0..66d7989 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java +++ b/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java @@ -20,6 +20,8 @@ import java.util.*; */ public class ImdbSpider { + private boolean isDebug; + private Manager manager; private ApiFactory apiFactory; @@ -38,6 +40,17 @@ public class ImdbSpider { manager = new Manager(); } + public boolean isDebug() { + return isDebug; + } + + public ImdbSpider setDebug(boolean debug) { + isDebug = debug; + manager.setDebug(isDebug); + apiFactory.setDebug(isDebug); + return this; + } + public ImdbSpider addHttpRequestHeader(String key, String value) { manager.addHttpRequestHeader(key, value); return this; diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/ImdbObject.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/ImdbObject.java index ff644cd..459ef93 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/imdb/ImdbObject.java +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/ImdbObject.java @@ -7,6 +7,8 @@ import java.util.EnumSet; */ public abstract class ImdbObject
& DataType> { + protected String html; + protected EnumSet
retrievedDataTypes; protected String id; @@ -16,6 +18,14 @@ public abstract class ImdbObject
& DataType> { this.initRetrievedDataTypes(); } + public String getHtml() { + return html; + } + + public void setHtml(String html) { + this.html = html; + } + protected abstract void initRetrievedDataTypes(); public boolean isDataTypeRetrieved(DT dataType) { diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java index 887931b..ae10650 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java @@ -16,6 +16,8 @@ import java.util.List; */ public interface ApiFactory { + void setDebug(boolean debug); + List createTasksForSearchMovieByTitle(String title, int maxCount, EnumSet dataTypes) throws ImdbSpiderException; Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException; diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java index 3189626..8b66462 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java @@ -5,6 +5,8 @@ package ru.bvn13.imdbspider.spider.api.v1_0; */ abstract public class AbstractApiProcessor_1_0 { + protected boolean isDebug; + private ApiFactory_1_0 apiFactory; public AbstractApiProcessor_1_0(ApiFactory_1_0 apiFactory) { @@ -14,4 +16,8 @@ abstract public class AbstractApiProcessor_1_0 { public ApiFactory_1_0 getApiFactory() { return apiFactory; } + + public void setDebug(boolean debug) { + isDebug = debug; + } } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java index 6e0cd49..b759f15 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java @@ -25,6 +25,8 @@ import java.util.regex.Pattern; */ public class ApiFactory_1_0 implements ApiFactory { + private boolean isDebug; + static final String URL_MAIN = "https://www.imdb.com"; private static final String URL_SEARCH_TITLE = "https://www.imdb.com/find?ref_=nv_sr_fn&q={{title}}&s=tt"; @@ -128,6 +130,12 @@ public class ApiFactory_1_0 implements ApiFactory { this.taglineProcessor = new TaglineProcessor_1_0(this); } + @Override + public void setDebug(boolean debug) { + isDebug = debug; + this.movieProcessor.setDebug(isDebug); + } + @Override public List createTasksForSearchMovieByTitle(String title, int maxCount, EnumSet dataTypes) throws ImdbSpiderException { diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java index 6a6fa07..40e5c5d 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java @@ -191,6 +191,9 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 { boolean isDone = false; switch ((MovieDataType) task.getDataType()) { case ID: + if (isDebug) { + movie.setHtml(task.getHtml()); + } movie.setUrl(task.getUrl()); movie.setId((String) task.getResult()); isDone = true; diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Manager.java b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Manager.java index 97c7329..88fb8e0 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Manager.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Manager.java @@ -13,8 +13,18 @@ import java.util.concurrent.*; */ public class Manager { + private boolean isDebug; + private Map httpRequestHeaders = new HashMap<>(); + public boolean isDebug() { + return isDebug; + } + + public void setDebug(boolean debug) { + isDebug = debug; + } + public void addHttpRequestHeader(String key, String value) { this.httpRequestHeaders.put(key, value); } @@ -38,6 +48,7 @@ public class Manager { groupedTasks.entrySet().parallelStream().forEach(stringListEntry -> { Worker w = new Worker(stringListEntry.getKey(), stringListEntry.getValue()); + w.setDebug(isDebug); try { w.run(httpRequestHeaders); } catch (HtmlExtractorException e) { diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java index d9ebeec..4e74e4b 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java @@ -14,6 +14,7 @@ import java.util.function.BiConsumer; public class Task { private String url; + private String html; private DataType dataType; private String cssSelector; @@ -58,6 +59,14 @@ public class Task { return this; } + public String getHtml() { + return html; + } + + public void setHtml(String html) { + this.html = html; + } + public String getCssSelector() { return cssSelector; } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Worker.java b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Worker.java index fbdb32d..dbd09d9 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Worker.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Worker.java @@ -7,16 +7,16 @@ import ru.bvn13.imdbspider.spider.extractor.HtmlExtractor; import ru.bvn13.imdbspider.spider.processor.HtmlProcessor; import ru.bvn13.imdbspider.spider.processor.JsoupHtmlProcessor; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.*; /** * @author boyko_vn at 09.01.2019 */ public class Worker { + private boolean isDebug; + private final String url; private final List tasks; @@ -31,12 +31,24 @@ public class Worker { this.htmlProcessor = new JsoupHtmlProcessor(); } + public boolean isDebug() { + return isDebug; + } + + public void setDebug(boolean debug) { + isDebug = debug; + } + public Boolean run(Map httpRequestHeaders) throws HtmlExtractorException { final String html = htmlExtractor.getHtml(url, httpRequestHeaders); tasks.parallelStream().forEach(task -> { + if (isDebug) { + task.setHtml(html); + } + try { if (task.getCssSelector() != null && !task.getCssSelector().isEmpty()) { task.setCssSelectorResult(htmlProcessor.process(html, task.getCssSelector())); diff --git a/core/src/test/java/ru/bvn13/imdbspider/runner/MovieSearchTest.java b/core/src/test/java/ru/bvn13/imdbspider/runner/MovieSearchTest.java index 9b42dbb..60b2e6d 100644 --- a/core/src/test/java/ru/bvn13/imdbspider/runner/MovieSearchTest.java +++ b/core/src/test/java/ru/bvn13/imdbspider/runner/MovieSearchTest.java @@ -31,6 +31,7 @@ public class MovieSearchTest { @BeforeClass public static void initClass() { spider = ImdbSpider.withApi_1_0() + .setDebug(true) .addHttpRequestHeader("Content-Language", "en-EN"); } @@ -70,6 +71,8 @@ public class MovieSearchTest { System.out.println("=========================================="); System.out.println("MOVIE: \n"+json); System.out.println("=========================================="); + System.out.println("HTML: \n"+movie.getHtml()); + System.out.println("=========================================="); assertTrue("Expected ID field presence", movie.isDataTypeRetrieved(MovieDataType.ID)); assertTrue("Expected TITLE field presence", movie.isDataTypeRetrieved(MovieDataType.TITLE)); @@ -93,7 +96,7 @@ public class MovieSearchTest { assertTrue("Expected TAGLINES field presence", movie.isDataTypeRetrieved(MovieDataType.TAGLINES)); assertEquals("Expected that first in search result has ID = 0088247, but given: "+movie.getId(), "0088247", movie.getId()); - assertEquals("Expected movie name: The Terminator, but given: "+movie.getOriginalTitle(), "The Terminator", movie.getOriginalTitle()); + assertEquals("Expected original movie name: The Terminator, but given: "+movie.getOriginalTitle(), "The Terminator", movie.getOriginalTitle()); assertEquals("Expected year = "+movie.getYear(), Integer.valueOf(1984), movie.getYear()); assertEquals("Invalid storyline", TERMINATOR_STORYLINE, movie.getStoryline());