From 347e3b1e96c891c4a84e7e6a80c1995742401cde Mon Sep 17 00:00:00 2001 From: Vyacheslav Boyko Date: Thu, 17 Jan 2019 14:30:01 +0300 Subject: [PATCH] implemented retrieving akas --- LICENSE | 2 +- .../java/ru/bvn13/imdbspider/imdb/Aka.java | 9 +++ .../ru/bvn13/imdbspider/imdb/AkaDataType.java | 1 + .../ru/bvn13/imdbspider/imdb/AkaList.java | 4 ++ .../java/ru/bvn13/imdbspider/imdb/Movie.java | 13 ++-- .../api/v1_0/AbstractApiProcessor_1_0.java | 14 +++- .../spider/api/v1_0/AkaListProcessor_1_0.java | 45 +++++++++++-- .../spider/api/v1_0/AkaProcessor_1_0.java | 66 ++++++++++++++++++- .../spider/api/v1_0/ApiFactory_1_0.java | 1 + .../api/v1_0/MovieListProcessor_1_0.java | 7 +- .../spider/api/v1_0/MovieProcessor_1_0.java | 26 ++++++-- .../api/v1_0/TaglineListProcessor_1_0.java | 7 +- .../spider/api/v1_0/TaglineProcessor_1_0.java | 3 +- .../spider/composer/AkaComposer.java | 26 ++++++++ .../spider/composer/AkaListComposer.java | 36 ++++++++++ .../composer/ImdbObjectComposerFactory.java | 13 ++++ .../spider/composer/MovieComposer.java | 6 ++ .../bvn13/imdbspider/spider/tasker/Task.java | 39 ++++++++++- .../imdbspider/runner/MovieSearchTest.java | 21 ++++-- 19 files changed, 301 insertions(+), 38 deletions(-) create mode 100644 core/src/main/java/ru/bvn13/imdbspider/spider/composer/AkaComposer.java create mode 100644 core/src/main/java/ru/bvn13/imdbspider/spider/composer/AkaListComposer.java diff --git a/LICENSE b/LICENSE index 7a97d39..5bb512d 100644 --- a/LICENSE +++ b/LICENSE @@ -182,7 +182,7 @@ replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the + file or class name and name of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/Aka.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/Aka.java index e1d1249..78ad28d 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/imdb/Aka.java +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/Aka.java @@ -7,6 +7,7 @@ import java.util.EnumSet; */ public class Aka extends ImdbObject { + private String name; private String title; @Override @@ -21,4 +22,12 @@ public class Aka extends ImdbObject { public void setTitle(String title) { this.title = title; } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } } diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaDataType.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaDataType.java index 0efe568..ae2e9e3 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaDataType.java +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaDataType.java @@ -6,6 +6,7 @@ package ru.bvn13.imdbspider.imdb; public enum AkaDataType implements DataType { ID("id"), + NAME("name"), TITLE("title") ; diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaList.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaList.java index b65327e..7e7f51c 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaList.java +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaList.java @@ -1,5 +1,6 @@ package ru.bvn13.imdbspider.imdb; +import java.util.ArrayList; import java.util.EnumSet; import java.util.List; @@ -16,6 +17,9 @@ public class AkaList extends ImdbObject { } public List getAkas() { + if (akas == null) { + akas = new ArrayList<>(); + } return akas; } diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/Movie.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/Movie.java index 497ac43..eea2b9c 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/imdb/Movie.java +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/Movie.java @@ -5,8 +5,6 @@ import ru.bvn13.imdbspider.imdb.accessories.SoundMix; import java.util.EnumSet; import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; /** * @author boyko_vn at 09.01.2019 @@ -33,7 +31,7 @@ public class Movie extends ImdbObject { private String aspectRatio; private TaglineList taglineList; - private Map akas = new ConcurrentHashMap<>(50); + private AkaList akaList; @Override protected void initRetrievedDataTypes() { @@ -192,12 +190,11 @@ public class Movie extends ImdbObject { this.taglineList = taglineList; } - public Map getAkas() { - return akas; + public AkaList getAkaList() { + return akaList; } - public void setAkas(Map akas) { - this.akas = akas; + public void setAkaList(AkaList akaList) { + this.akaList = akaList; } - } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java index 56b289a..958e757 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java @@ -27,5 +27,17 @@ abstract public class AbstractApiProcessor_1_0 { + nestedTask.setImdbObjectParentId(parentId); + }); + } + } } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaListProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaListProcessor_1_0.java index 01141b4..e8ccefd 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaListProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaListProcessor_1_0.java @@ -1,5 +1,7 @@ package ru.bvn13.imdbspider.spider.api.v1_0; +import org.jsoup.nodes.Element; +import ru.bvn13.imdbspider.imdb.AkaDataType; import ru.bvn13.imdbspider.imdb.AkaList; import ru.bvn13.imdbspider.imdb.AkaListDataType; import ru.bvn13.imdbspider.spider.tasker.Task; @@ -14,13 +16,48 @@ public class AkaListProcessor_1_0 extends AbstractApiProcessor_1_0 table.ipl-zebra-list.akas-table-test-only > tbody > tr"); + t.setPostprocess((task, s) -> { + int i = 0; + for (Element element : task.getCssSelectorResult()) { + Task akaTask = getApiFactory().getAkaProcessor().taskByDataType(AkaDataType.ID, task.getImdbObjectParentId()) + .setUrl(task.getUrl()) + .setResultType(String.class) + .setResult(String.format("%d", (i++))) + .setParentTask(task); + task.getNestedTasks().add(akaTask); + + for (AkaDataType value : AkaDataType.values()) { + if (!value.equals(AkaDataType.ID)) { + Task newTask = getApiFactory().getAkaProcessor().taskByDataType(value, task.getImdbObjectParentId()) + .setSourceType(Task.SOURCE_TYPE.HTML) + .setUrl(task.getUrl()) + .setSourceHtml(element.html()) + .setParentTask(akaTask); + akaTask.getNestedTasks().add(newTask); + } + } + } + }); + break; + } + return t; } @Override - public void fillUpImdbObject(AkaList imdbObject, Task task) { - + public void fillUpImdbObject(AkaList akaList, Task task) { + switch ((AkaListDataType) task.getDataType()) { + case ELEMENTS: + akaList.setUrl(task.getUrl()); + akaList.getRetrievedDataTypes().add((AkaListDataType) task.getDataType()); + break; + } } } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaProcessor_1_0.java index d7550b1..c2965fb 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaProcessor_1_0.java @@ -1,5 +1,8 @@ package ru.bvn13.imdbspider.spider.api.v1_0; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import ru.bvn13.imdbspider.exceptions.processor.HtmlProcessorException; import ru.bvn13.imdbspider.imdb.Aka; import ru.bvn13.imdbspider.imdb.AkaDataType; import ru.bvn13.imdbspider.spider.tasker.Task; @@ -13,12 +16,69 @@ public class AkaProcessor_1_0 extends AbstractApiProcessor_1_0 } @Override - Task taskByDataType(AkaDataType dataType) { - return null; + Task taskByDataType(AkaDataType dataType, String imdbObjectParentId) { + Task t = new Task(); + t.setImdbObjectParentId(imdbObjectParentId); + t.setDataType(dataType); + switch (dataType) { + case ID: + // + break; + case NAME: + t.setPostprocess((task, s) -> { + task.setResultType(String.class); + task.setResult(""); + try { + Elements els = getApiFactory().getHtmlProcessor().process(String.format("%s
", task.getSourceHtml()), "td.aka-item__name"); + if (els.size() > 0) { + Element name = els.first(); + task.setResult(name.text()); + } + } catch (HtmlProcessorException e) { + e.printStackTrace(); + } + }); + break; + case TITLE: + t.setPostprocess((task, s) -> { + task.setResultType(String.class); + task.setResult(""); + try { + Elements els = getApiFactory().getHtmlProcessor().process(String.format("%s
", task.getSourceHtml()), "td.aka-item__title"); + if (els.size() > 0) { + Element title = els.first(); + task.setResult(title.text()); + } + } catch (HtmlProcessorException e) { + e.printStackTrace(); + } + }); + break; + } + return t; } @Override - void fillUpImdbObject(Aka imdbObject, Task task) { + void fillUpImdbObject(Aka aka, Task task) { + boolean isDone = false; + switch ((AkaDataType) task.getDataType()) { + case ID: + aka.setId((String) task.getResult()); + aka.setUrl(task.getUrl()); + isDone = true; + break; + case NAME: + aka.setName((String) task.getResult()); + isDone = true; + break; + case TITLE: + aka.setTitle((String) task.getResult()); + isDone = true; + break; + } + if (isDone) { + aka.getRetrievedDataTypes().add((AkaDataType) task.getDataType()); + } } } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java index bf4af1b..a468157 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java @@ -30,6 +30,7 @@ public class ApiFactory_1_0 implements ApiFactory { static final String URL_MAIN = "https://www.imdb.com"; private static final String URL_SEARCH_TITLE = "https://www.imdb.com/find?ref_=nv_sr_fn&q={{title}}&s=tt"; + static final String URL_AKAS = "https://www.imdb.com/title/tt{{movie_id}}/releaseinfo"; static final Pattern PATTERN_MOVIE_ID_FROM_MOVIELIST = Pattern.compile("/title/tt(\\d+)/.*"); diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieListProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieListProcessor_1_0.java index fc63370..feb01ed 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieListProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieListProcessor_1_0.java @@ -18,8 +18,9 @@ public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 - movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByDataType(movieDataType) + movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByDataType(movieDataType, task.getImdbObjectParentId()) .setParentTask(movieTask) .setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href"))))); } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java index 6e197d9..f674658 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java @@ -3,9 +3,7 @@ package ru.bvn13.imdbspider.spider.api.v1_0; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import ru.bvn13.imdbspider.exceptions.processor.HtmlProcessorException; -import ru.bvn13.imdbspider.imdb.Movie; -import ru.bvn13.imdbspider.imdb.MovieDataType; -import ru.bvn13.imdbspider.imdb.TaglineListDataType; +import ru.bvn13.imdbspider.imdb.*; import ru.bvn13.imdbspider.imdb.accessories.Link; import ru.bvn13.imdbspider.imdb.accessories.SoundMix; import ru.bvn13.imdbspider.spider.tasker.Task; @@ -24,8 +22,9 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 0) { - task.setResult(task.getCssSelectorResult().first().text()); + task.setResult(task.getCssSelectorResult().first().ownText()); } else { try { Elements titles = getApiFactory().getHtmlProcessor().process(s, "#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > h1"); // like title @@ -190,7 +190,7 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 0) { Elements links = task.getCssSelectorResult().first().parent().select("span > a:contains(See more)"); if (links.size() > 0) { - Task newTask = getApiFactory().getTaglineListProcessor().taskByDataType(TaglineListDataType.ELEMENTS) + Task newTask = getApiFactory().getTaglineListProcessor().taskByDataType(TaglineListDataType.ELEMENTS, task.getImdbObjectParentId()) .setParentTask(task) .setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, links.first().attr("href"))); task.getNestedTasks().add(newTask); @@ -198,6 +198,14 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 { + Task akasTask = getApiFactory().getAkaListProcessor().taskByDataType(AkaListDataType.ELEMENTS, task.getImdbObjectParentId()) + .setParentTask(task) + .setUrl(ApiFactory_1_0.URL_AKAS.replace("{{movie_id}}", t.getImdbObjectParentId())); + task.getNestedTasks().add(akasTask); + }); + break; } return t; } @@ -289,6 +297,12 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 { for (Element element : task.getCssSelectorResult()) { - Task newTaskId = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.ID) + Task newTaskId = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.ID, task.getImdbObjectParentId()) .setParentTask(task) .setUrl(task.getUrl()) .setResult(String.format("%d", i.getAndAdd(1))); task.getNestedTasks().add(newTaskId); - Task newTaskText = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.TEXT) + Task newTaskText = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.TEXT, task.getImdbObjectParentId()) .setParentTask(task) .setUrl(task.getUrl()) .setResult(element.text()); diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineProcessor_1_0.java index f9f4730..94efb2b 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineProcessor_1_0.java @@ -14,8 +14,9 @@ public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0 { + public AkaComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException { + super(apiFactory, imdbObjectComposerFactory); + } + + @Override + public Aka compose(Task task) throws ImdbSpiderException { + Aka aka = new Aka(); + this.apiFactory.fillUpImdbObject(aka, task); + for (Task nestedTask : task.getNestedTasks()) { + this.apiFactory.fillUpImdbObject(aka, nestedTask); + } + return aka; + } +} diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/composer/AkaListComposer.java b/core/src/main/java/ru/bvn13/imdbspider/spider/composer/AkaListComposer.java new file mode 100644 index 0000000..2fbd173 --- /dev/null +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/composer/AkaListComposer.java @@ -0,0 +1,36 @@ +package ru.bvn13.imdbspider.spider.composer; + +import ru.bvn13.imdbspider.exceptions.ImdbSpiderException; +import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException; +import ru.bvn13.imdbspider.imdb.Aka; +import ru.bvn13.imdbspider.imdb.AkaList; +import ru.bvn13.imdbspider.spider.api.ApiFactory; +import ru.bvn13.imdbspider.spider.tasker.Task; + +/** + * @author boyko_vn at 17.01.2019 + */ +public class AkaListComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer { + + private AkaComposer akaComposer; + + public AkaListComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException { + super(apiFactory, imdbObjectComposerFactory); + + akaComposer = (AkaComposer) this.imdbObjectComposerFactory.getComposer(Aka.class); + } + + @Override + public AkaList compose(Task task) throws ImdbSpiderException { + AkaList akaList = new AkaList(); + if (task.getNestedTasks().size() > 0) { + Task akaListTask = task.getNestedTasks().get(0); + + this.apiFactory.fillUpImdbObject(akaList, task); + for (Task nestedTask : akaListTask.getNestedTasks()) { + akaList.getAkas().add(akaComposer.compose(nestedTask)); + } + } + return akaList; + } +} diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/composer/ImdbObjectComposerFactory.java b/core/src/main/java/ru/bvn13/imdbspider/spider/composer/ImdbObjectComposerFactory.java index bb3770c..4a94970 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/composer/ImdbObjectComposerFactory.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/composer/ImdbObjectComposerFactory.java @@ -19,6 +19,9 @@ public class ImdbObjectComposerFactory { private MovieComposer movieComposer; private TaglineListComposer taglineListComposer; private TaglineComposer taglineComposer; + private AkaListComposer akaListComposer; + private AkaComposer akaComposer; + public ImdbObjectComposer getComposer(Class clazz) throws ComposerNotFoundException { if (clazz.isAssignableFrom(MovieList.class)) { @@ -41,6 +44,16 @@ public class ImdbObjectComposerFactory { taglineComposer = new TaglineComposer(apiFactory, this); } return taglineComposer; + } if (clazz.isAssignableFrom(AkaList.class)) { + if (akaListComposer == null) { + akaListComposer = new AkaListComposer(apiFactory, this); + } + return akaListComposer; + } if (clazz.isAssignableFrom(Aka.class)) { + if (akaComposer == null) { + akaComposer = new AkaComposer(apiFactory, this); + } + return akaComposer; } throw new ComposerNotFoundException(String.format("Composer not found: %s", clazz.getName())); diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/composer/MovieComposer.java b/core/src/main/java/ru/bvn13/imdbspider/spider/composer/MovieComposer.java index 55eaeda..dc1828c 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/composer/MovieComposer.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/composer/MovieComposer.java @@ -2,6 +2,7 @@ package ru.bvn13.imdbspider.spider.composer; import ru.bvn13.imdbspider.exceptions.ImdbSpiderException; import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException; +import ru.bvn13.imdbspider.imdb.AkaList; import ru.bvn13.imdbspider.imdb.Movie; import ru.bvn13.imdbspider.imdb.MovieDataType; import ru.bvn13.imdbspider.imdb.TaglineList; @@ -14,11 +15,13 @@ import ru.bvn13.imdbspider.spider.tasker.Task; public class MovieComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer { private TaglineListComposer taglineListComposer; + private AkaListComposer akaListComposer; public MovieComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException { super(apiFactory, imdbObjectComposerFactory); taglineListComposer = (TaglineListComposer) this.imdbObjectComposerFactory.getComposer(TaglineList.class); + akaListComposer = (AkaListComposer) this.imdbObjectComposerFactory.getComposer(AkaList.class); } @Override @@ -30,6 +33,9 @@ public class MovieComposer extends AbstractImdbObjectComposer implements ImdbObj if (nestedTask.getDataType().equals(MovieDataType.TAGLINES)) { movie.setTaglineList(taglineListComposer.compose(nestedTask)); } + if (nestedTask.getDataType().equals(MovieDataType.AKAS)) { + movie.setAkaList(akaListComposer.compose(nestedTask)); + } } return movie; } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java index 4e74e4b..94a1e07 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/tasker/Task.java @@ -13,9 +13,19 @@ import java.util.function.BiConsumer; */ public class Task { + private String imdbObjectParentId; private String url; private String html; + public enum SOURCE_TYPE { + URL, + HTML + } + + private SOURCE_TYPE sourceType = SOURCE_TYPE.URL; + + private String sourceHtml; + private DataType dataType; private String cssSelector; private Elements cssSelectorResult; @@ -50,6 +60,32 @@ public class Task { this.dataType = dataType; } + public String getImdbObjectParentId() { + return imdbObjectParentId; + } + + public void setImdbObjectParentId(String imdbObjectParentId) { + this.imdbObjectParentId = imdbObjectParentId; + } + + public SOURCE_TYPE getSourceType() { + return sourceType; + } + + public Task setSourceType(SOURCE_TYPE sourceType) { + this.sourceType = sourceType; + return this; + } + + public String getSourceHtml() { + return sourceHtml; + } + + public Task setSourceHtml(String sourceHtml) { + this.sourceHtml = sourceHtml; + return this; + } + public String getUrl() { return url; } @@ -63,8 +99,9 @@ public class Task { return html; } - public void setHtml(String html) { + public Task setHtml(String html) { this.html = html; + return this; } public String getCssSelector() { diff --git a/core/src/test/java/ru/bvn13/imdbspider/runner/MovieSearchTest.java b/core/src/test/java/ru/bvn13/imdbspider/runner/MovieSearchTest.java index 9637e36..3005cb4 100644 --- a/core/src/test/java/ru/bvn13/imdbspider/runner/MovieSearchTest.java +++ b/core/src/test/java/ru/bvn13/imdbspider/runner/MovieSearchTest.java @@ -57,7 +57,8 @@ public class MovieSearchTest { MovieDataType.COLOR, MovieDataType.ASPECT_RATIO, MovieDataType.POSTER, - MovieDataType.TAGLINES + MovieDataType.TAGLINES, + MovieDataType.AKAS ); @@ -94,28 +95,30 @@ public class MovieSearchTest { assertTrue("Expected ASPECT_RATIO field presence", movie.isDataTypeRetrieved(MovieDataType.ASPECT_RATIO)); assertTrue("Expected POSTER field presence", movie.isDataTypeRetrieved(MovieDataType.POSTER)); assertTrue("Expected TAGLINES field presence", movie.isDataTypeRetrieved(MovieDataType.TAGLINES)); + assertTrue("Expected AKAS field presence", movie.isDataTypeRetrieved(MovieDataType.AKAS)); assertEquals("Expected that first in search result has ID = 0088247, but given: "+movie.getId(), "0088247", movie.getId()); assertEquals("Expected original movie name: The Terminator, but given: "+movie.getOriginalTitle(), "The Terminator", movie.getOriginalTitle()); + //assertTrue("Expected original movie name starts with: The Terminator, but given: "+movie.getOriginalTitle(), movie.getOriginalTitle().startsWith("The Terminator")); assertEquals("Expected year = "+movie.getYear(), Integer.valueOf(1984), movie.getYear()); assertEquals("Invalid storyline", TERMINATOR_STORYLINE, movie.getStoryline()); //assertEquals(TERMINATOR_TAGLINES, movie.getRandomTagline()); - assertTrue("Expected that genre Action is present", movie.getGenres().contains("Action")); - assertTrue("Expected that genre Sci-Fi is present", movie.getGenres().contains("Sci-Fi")); + assertTrue("Expected that genre Action was present", movie.getGenres().contains("Action")); + assertTrue("Expected that genre Sci-Fi was present", movie.getGenres().contains("Sci-Fi")); //assertEquals("Expected that certificate is 16+ but given: "+movie.getCertificate(), "16+", movie.getCertificate()); //cannot be blocker - it depends on locale, which IMDB determines on geo-location (it seems) //assertTrue(movie.getOfficialSites().contains("Facebook")); - assertTrue("Expected than at least one site is present", movie.getOfficialSites().size() > 0); + assertTrue("Expected than at least one site was present", movie.getOfficialSites().size() > 0); assertEquals("Expected that first site is Facebook", "Facebook", movie.getOfficialSites().get(0).getTitle()); //assertEquals(TERMINATOR_FACEBOOK_URL, movie.getOfficialSites().get(0).getUrl()); //not comparable, dynamic link - assertTrue("Expected at least one country is present", movie.getCountries().size() > 0); + assertTrue("Expected at least one country was present", movie.getCountries().size() > 0); assertTrue("Expected country UK", movie.getCountries().contains("UK")); assertTrue("Expected country USA", movie.getCountries().contains("USA")); - assertTrue("Expected at least one language is present", movie.getLanguages().size() > 0); + assertTrue("Expected at least one language was present", movie.getLanguages().size() > 0); assertTrue("Expected language English", movie.getLanguages().contains("English")); assertTrue("Expected language Spanish", movie.getLanguages().contains("Spanish")); @@ -151,7 +154,7 @@ public class MovieSearchTest { // taglines - assertNotNull("Expected that tagline list presence", movie.getTaglineList()); + assertNotNull("Expected that tagline list was present", movie.getTaglineList()); assertEquals("Expected 5 taglines but given: "+movie.getTaglineList().getTaglines().size(), 5, movie.getTaglineList().getTaglines().size()); boolean hasTagline1 = false, hasTagline2 = false, hasTagline3 = false, hasTagline4 = false, hasTagline5 = false; @@ -170,5 +173,9 @@ public class MovieSearchTest { assertTrue("Expected Tagline 3 presence", hasTagline3); assertTrue("Expected Tagline 4 presence", hasTagline4); assertTrue("Expected Tagline 5 presence", hasTagline5); + + // akas + assertNotNull("Expected that akas list was present", movie.getAkaList()); + assertEquals(64, movie.getAkaList().getAkas().size()); } }