From c133ad77bb251d1a47347559691c544a2b1f44eb Mon Sep 17 00:00:00 2001 From: "Vyacheslav N. Boyko" Date: Thu, 17 Jan 2019 00:07:28 +0300 Subject: [PATCH] start working on retrieving akas, refactoring --- .../java/ru/bvn13/imdbspider/imdb/Aka.java | 24 +++++++++++ .../ru/bvn13/imdbspider/imdb/AkaDataType.java | 22 ++++++++++ .../ru/bvn13/imdbspider/imdb/AkaList.java | 25 ++++++++++++ .../imdbspider/imdb/AkaListDataType.java | 21 ++++++++++ .../api/v1_0/AbstractApiProcessor_1_0.java | 10 ++++- .../spider/api/v1_0/AkaListProcessor_1_0.java | 26 ++++++++++++ .../spider/api/v1_0/AkaProcessor_1_0.java | 24 +++++++++++ .../spider/api/v1_0/ApiFactory_1_0.java | 40 +++++++++++++++---- .../api/v1_0/MovieListProcessor_1_0.java | 12 +++--- .../spider/api/v1_0/MovieProcessor_1_0.java | 10 +++-- .../api/v1_0/TaglineListProcessor_1_0.java | 12 +++--- .../spider/api/v1_0/TaglineProcessor_1_0.java | 9 +++-- 12 files changed, 208 insertions(+), 27 deletions(-) create mode 100644 core/src/main/java/ru/bvn13/imdbspider/imdb/Aka.java create mode 100644 core/src/main/java/ru/bvn13/imdbspider/imdb/AkaDataType.java create mode 100644 core/src/main/java/ru/bvn13/imdbspider/imdb/AkaList.java create mode 100644 core/src/main/java/ru/bvn13/imdbspider/imdb/AkaListDataType.java create mode 100644 core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaListProcessor_1_0.java create mode 100644 core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaProcessor_1_0.java diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/Aka.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/Aka.java new file mode 100644 index 0000000..e1d1249 --- /dev/null +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/Aka.java @@ -0,0 +1,24 @@ +package ru.bvn13.imdbspider.imdb; + +import java.util.EnumSet; + +/** + * Created by bvn13 on 16.01.2019. + */ +public class Aka extends ImdbObject { + + private String title; + + @Override + protected void initRetrievedDataTypes() { + this.retrievedDataTypes = EnumSet.noneOf(AkaDataType.class); + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } +} diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaDataType.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaDataType.java new file mode 100644 index 0000000..0efe568 --- /dev/null +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaDataType.java @@ -0,0 +1,22 @@ +package ru.bvn13.imdbspider.imdb; + +/** + * Created by bvn13 on 16.01.2019. + */ +public enum AkaDataType implements DataType { + + ID("id"), + TITLE("title") + ; + + private String value; + + AkaDataType(String value) { + this.value = value; + } + + @Override + public String get() { + return value; + } +} diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaList.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaList.java new file mode 100644 index 0000000..b65327e --- /dev/null +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaList.java @@ -0,0 +1,25 @@ +package ru.bvn13.imdbspider.imdb; + +import java.util.EnumSet; +import java.util.List; + +/** + * Created by bvn13 on 16.01.2019. + */ +public class AkaList extends ImdbObject { + + private List akas; + + @Override + protected void initRetrievedDataTypes() { + this.retrievedDataTypes = EnumSet.noneOf(AkaListDataType.class); + } + + public List getAkas() { + return akas; + } + + public void setAkas(List akas) { + this.akas = akas; + } +} diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaListDataType.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaListDataType.java new file mode 100644 index 0000000..223e95f --- /dev/null +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/AkaListDataType.java @@ -0,0 +1,21 @@ +package ru.bvn13.imdbspider.imdb; + +/** + * Created by bvn13 on 16.01.2019. + */ +public enum AkaListDataType implements DataType { + + ELEMENTS("elements") + ; + + private String value; + + AkaListDataType(String value) { + this.value = value; + } + + @Override + public String get() { + return value; + } +} diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java index 8b66462..56b289a 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AbstractApiProcessor_1_0.java @@ -1,9 +1,13 @@ package ru.bvn13.imdbspider.spider.api.v1_0; +import ru.bvn13.imdbspider.imdb.DataType; +import ru.bvn13.imdbspider.imdb.ImdbObject; +import ru.bvn13.imdbspider.spider.tasker.Task; + /** * @author boyko_vn at 15.01.2019 */ -abstract public class AbstractApiProcessor_1_0 { +abstract public class AbstractApiProcessor_1_0 & DataType> { protected boolean isDebug; @@ -20,4 +24,8 @@ abstract public class AbstractApiProcessor_1_0 { public void setDebug(boolean debug) { isDebug = debug; } + + abstract void fillUpImdbObject(C imdbObject, Task task); + + abstract Task taskByDataType(D dataType); } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaListProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaListProcessor_1_0.java new file mode 100644 index 0000000..01141b4 --- /dev/null +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaListProcessor_1_0.java @@ -0,0 +1,26 @@ +package ru.bvn13.imdbspider.spider.api.v1_0; + +import ru.bvn13.imdbspider.imdb.AkaList; +import ru.bvn13.imdbspider.imdb.AkaListDataType; +import ru.bvn13.imdbspider.spider.tasker.Task; + +/** + * Created by bvn13 on 16.01.2019. + */ +public class AkaListProcessor_1_0 extends AbstractApiProcessor_1_0 { + + public AkaListProcessor_1_0(ApiFactory_1_0 apiFactory) { + super(apiFactory); + } + + @Override + Task taskByDataType(AkaListDataType dataType) { + return null; + } + + @Override + public void fillUpImdbObject(AkaList imdbObject, Task task) { + + } + +} diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaProcessor_1_0.java new file mode 100644 index 0000000..d7550b1 --- /dev/null +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/AkaProcessor_1_0.java @@ -0,0 +1,24 @@ +package ru.bvn13.imdbspider.spider.api.v1_0; + +import ru.bvn13.imdbspider.imdb.Aka; +import ru.bvn13.imdbspider.imdb.AkaDataType; +import ru.bvn13.imdbspider.spider.tasker.Task; + +/** + * Created by bvn13 on 16.01.2019. + */ +public class AkaProcessor_1_0 extends AbstractApiProcessor_1_0 { + public AkaProcessor_1_0(ApiFactory_1_0 apiFactory) { + super(apiFactory); + } + + @Override + Task taskByDataType(AkaDataType dataType) { + return null; + } + + @Override + void fillUpImdbObject(Aka imdbObject, Task task) { + + } +} diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java index 4f60c1c..bf4af1b 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java @@ -120,6 +120,8 @@ public class ApiFactory_1_0 implements ApiFactory { private MovieProcessor_1_0 movieProcessor; private TaglineListProcessor_1_0 taglineListProcessor; private TaglineProcessor_1_0 taglineProcessor; + private AkaListProcessor_1_0 akaListProcessor; + private AkaProcessor_1_0 akaProcessor; public ApiFactory_1_0(HtmlProcessor htmlProcessor) { this.htmlProcessor = htmlProcessor; @@ -128,6 +130,8 @@ public class ApiFactory_1_0 implements ApiFactory { this.movieProcessor = new MovieProcessor_1_0(this); this.taglineListProcessor = new TaglineListProcessor_1_0(this); this.taglineProcessor = new TaglineProcessor_1_0(this); + this.akaListProcessor = new AkaListProcessor_1_0(this); + this.akaProcessor = new AkaProcessor_1_0(this); } @Override @@ -163,13 +167,17 @@ public class ApiFactory_1_0 implements ApiFactory { @Override public Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException { if (dataType instanceof MovieDataType) { - return movieProcessor.taskByMovieDataType((MovieDataType) dataType); + return movieProcessor.taskByDataType((MovieDataType) dataType); } else if (dataType instanceof MovieListDataType) { - return movieListProcessor.taskByMovieListDataType((MovieListDataType) dataType); + return movieListProcessor.taskByDataType((MovieListDataType) dataType); } else if (dataType instanceof TaglineListDataType) { - return taglineListProcessor.taskByTaglineListDataType((TaglineListDataType) dataType); + return taglineListProcessor.taskByDataType((TaglineListDataType) dataType); } else if (dataType instanceof TaglineDataType) { - return taglineProcessor.taskByTaglineDataType((TaglineDataType) dataType); + return taglineProcessor.taskByDataType((TaglineDataType) dataType); + } else if (dataType instanceof AkaListDataType) { + return akaListProcessor.taskByDataType((AkaListDataType) dataType); + } else if (dataType instanceof AkaDataType) { + return akaProcessor.taskByDataType((AkaDataType) dataType); } else { throw new DataTypeNotSupportedException(String.format("DataType %s is not supported by API v1_0!", dataType.getClass().getName())); } @@ -179,19 +187,27 @@ public class ApiFactory_1_0 implements ApiFactory { public void fillUpImdbObject(ImdbObject imdbObject, Task task) { if (imdbObject instanceof Movie) { if (task.getDataType() instanceof MovieDataType) { - movieProcessor.fillUpMovie((Movie) imdbObject, task); + movieProcessor.fillUpImdbObject((Movie) imdbObject, task); } } else if (imdbObject instanceof MovieList) { if (task.getDataType() instanceof MovieListDataType) { - movieListProcessor.fillUpMovieList((MovieList) imdbObject, task); + movieListProcessor.fillUpImdbObject((MovieList) imdbObject, task); } } else if (imdbObject instanceof TaglineList) { if (task.getDataType() instanceof TaglineListDataType) { - taglineListProcessor.fillUpTaglineList((TaglineList) imdbObject, task); + taglineListProcessor.fillUpImdbObject((TaglineList) imdbObject, task); } } else if (imdbObject instanceof Tagline) { if (task.getDataType() instanceof TaglineDataType) { - taglineProcessor.fillUpTagline((Tagline) imdbObject, task); + taglineProcessor.fillUpImdbObject((Tagline) imdbObject, task); + } + } else if (imdbObject instanceof AkaList) { + if (task.getDataType() instanceof AkaListDataType) { + akaListProcessor.fillUpImdbObject((AkaList) imdbObject, task); + } + } else if (imdbObject instanceof Aka) { + if (task.getDataType() instanceof AkaDataType) { + akaProcessor.fillUpImdbObject((Aka) imdbObject, task); } } } @@ -230,4 +246,12 @@ public class ApiFactory_1_0 implements ApiFactory { TaglineListProcessor_1_0 getTaglineListProcessor() { return taglineListProcessor; } + + AkaListProcessor_1_0 getAkaListProcessor() { + return akaListProcessor; + } + + AkaProcessor_1_0 getAkaProcessor() { + return akaProcessor; + } } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieListProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieListProcessor_1_0.java index bd2a793..fc63370 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieListProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieListProcessor_1_0.java @@ -11,13 +11,14 @@ import java.util.List; /** * @author boyko_vn at 15.01.2019 */ -public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 { +public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 { public MovieListProcessor_1_0(ApiFactory_1_0 apiFactory) { super(apiFactory); } - Task taskByMovieListDataType(MovieListDataType movieListDataType) { + @Override + Task taskByDataType(MovieListDataType movieListDataType) { Task t = new Task(); t.setDataType(movieListDataType); switch (movieListDataType) { @@ -40,12 +41,12 @@ public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 { if (!getApiFactory().getMovieDataTypeSet().contains(MovieDataType.ID)) { getApiFactory().getMovieDataTypeSet().add(MovieDataType.ID); } - Task movieTask = getApiFactory().getMovieProcessor().taskByMovieDataType(MovieDataType.ID) + Task movieTask = getApiFactory().getMovieProcessor().taskByDataType(MovieDataType.ID) .setParentTask(task) .setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href"))); task.getNestedTasks().add(movieTask); getApiFactory().getMovieDataTypeSet().forEach(movieDataType -> - movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByMovieDataType(movieDataType) + movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByDataType(movieDataType) .setParentTask(movieTask) .setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href"))))); } @@ -55,7 +56,8 @@ public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 { return t; } - void fillUpMovieList(MovieList movieList, Task task) { + @Override + void fillUpImdbObject(MovieList movieList, Task task) { switch ((MovieListDataType) task.getDataType()) { case ELEMENTS: movieList.setUrl(task.getUrl()); diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java index fbea68f..6e197d9 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java @@ -17,13 +17,14 @@ import java.util.regex.Matcher; /** * @author boyko_vn at 15.01.2019 */ -public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 { +public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 { public MovieProcessor_1_0(ApiFactory_1_0 apiFactory) { super(apiFactory); } - public Task taskByMovieDataType(MovieDataType movieDataType) { + @Override + Task taskByDataType(MovieDataType movieDataType) { Task t = new Task(); t.setDataType(movieDataType); switch (movieDataType) { @@ -189,7 +190,7 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 { if (task.getCssSelectorResult().size() > 0) { Elements links = task.getCssSelectorResult().first().parent().select("span > a:contains(See more)"); if (links.size() > 0) { - Task newTask = getApiFactory().getTaglineListProcessor().taskByTaglineListDataType(TaglineListDataType.ELEMENTS) + Task newTask = getApiFactory().getTaglineListProcessor().taskByDataType(TaglineListDataType.ELEMENTS) .setParentTask(task) .setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, links.first().attr("href"))); task.getNestedTasks().add(newTask); @@ -202,7 +203,8 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 { } - void fillUpMovie(Movie movie, Task task) { + @Override + void fillUpImdbObject(Movie movie, Task task) { boolean isDone = false; switch ((MovieDataType) task.getDataType()) { case ID: diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineListProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineListProcessor_1_0.java index 048046e..8e14dcc 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineListProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineListProcessor_1_0.java @@ -11,13 +11,14 @@ import java.util.concurrent.atomic.AtomicInteger; /** * @author boyko_vn at 15.01.2019 */ -public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 { +public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 { public TaglineListProcessor_1_0(ApiFactory_1_0 apiFactory) { super(apiFactory); } - Task taskByTaglineListDataType(TaglineListDataType taglineListDataType) { + @Override + Task taskByDataType(TaglineListDataType taglineListDataType) { Task t = new Task(); t.setDataType(taglineListDataType); switch (taglineListDataType) { @@ -26,13 +27,13 @@ public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 { AtomicInteger i = new AtomicInteger(0); t.setPostprocess((task, s) -> { for (Element element : task.getCssSelectorResult()) { - Task newTaskId = getApiFactory().getTaglineProcessor().taskByTaglineDataType(TaglineDataType.ID) + Task newTaskId = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.ID) .setParentTask(task) .setUrl(task.getUrl()) .setResult(String.format("%d", i.getAndAdd(1))); task.getNestedTasks().add(newTaskId); - Task newTaskText = getApiFactory().getTaglineProcessor().taskByTaglineDataType(TaglineDataType.TEXT) + Task newTaskText = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.TEXT) .setParentTask(task) .setUrl(task.getUrl()) .setResult(element.text()); @@ -44,7 +45,8 @@ public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 { return t; } - void fillUpTaglineList(TaglineList taglineList, Task task) { + @Override + void fillUpImdbObject(TaglineList taglineList, Task task) { switch ((TaglineListDataType) task.getDataType()) { case ELEMENTS: taglineList.setUrl(task.getUrl()); diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineProcessor_1_0.java index a971d69..f9f4730 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/TaglineProcessor_1_0.java @@ -7,13 +7,14 @@ import ru.bvn13.imdbspider.spider.tasker.Task; /** * @author boyko_vn at 15.01.2019 */ -public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0 { +public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0 { public TaglineProcessor_1_0(ApiFactory_1_0 apiFactory) { super(apiFactory); } - Task taskByTaglineDataType(TaglineDataType taglineDataType) { + @Override + Task taskByDataType(TaglineDataType taglineDataType) { Task t = new Task(); t.setDataType(taglineDataType); switch (taglineDataType) { @@ -29,8 +30,8 @@ public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0 { return t; } - - void fillUpTagline(Tagline tagline, Task task) { + @Override + void fillUpImdbObject(Tagline tagline, Task task) { switch ((TaglineDataType) task.getDataType()) { case ID: tagline.setUrl(task.getUrl());