mirror of https://github.com/bvn13/imdb-spider.git
implemented retrieving akas
parent
c133ad77bb
commit
347e3b1e96
2
LICENSE
2
LICENSE
|
@ -182,7 +182,7 @@
|
||||||
replaced with your own identifying information. (Don't include
|
replaced with your own identifying information. (Don't include
|
||||||
the brackets!) The text should be enclosed in the appropriate
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
comment syntax for the file format. We also recommend that a
|
comment syntax for the file format. We also recommend that a
|
||||||
file or class name and description of purpose be included on the
|
file or class name and name of purpose be included on the
|
||||||
same "printed page" as the copyright notice for easier
|
same "printed page" as the copyright notice for easier
|
||||||
identification within third-party archives.
|
identification within third-party archives.
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ import java.util.EnumSet;
|
||||||
*/
|
*/
|
||||||
public class Aka extends ImdbObject<AkaDataType> {
|
public class Aka extends ImdbObject<AkaDataType> {
|
||||||
|
|
||||||
|
private String name;
|
||||||
private String title;
|
private String title;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -21,4 +22,12 @@ public class Aka extends ImdbObject<AkaDataType> {
|
||||||
public void setTitle(String title) {
|
public void setTitle(String title) {
|
||||||
this.title = title;
|
this.title = title;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ package ru.bvn13.imdbspider.imdb;
|
||||||
public enum AkaDataType implements DataType {
|
public enum AkaDataType implements DataType {
|
||||||
|
|
||||||
ID("id"),
|
ID("id"),
|
||||||
|
NAME("name"),
|
||||||
TITLE("title")
|
TITLE("title")
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package ru.bvn13.imdbspider.imdb;
|
package ru.bvn13.imdbspider.imdb;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -16,6 +17,9 @@ public class AkaList extends ImdbObject<AkaListDataType> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Aka> getAkas() {
|
public List<Aka> getAkas() {
|
||||||
|
if (akas == null) {
|
||||||
|
akas = new ArrayList<>();
|
||||||
|
}
|
||||||
return akas;
|
return akas;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,8 +5,6 @@ import ru.bvn13.imdbspider.imdb.accessories.SoundMix;
|
||||||
|
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author boyko_vn at 09.01.2019
|
* @author boyko_vn at 09.01.2019
|
||||||
|
@ -33,7 +31,7 @@ public class Movie extends ImdbObject<MovieDataType> {
|
||||||
private String aspectRatio;
|
private String aspectRatio;
|
||||||
|
|
||||||
private TaglineList taglineList;
|
private TaglineList taglineList;
|
||||||
private Map<String, String> akas = new ConcurrentHashMap<>(50);
|
private AkaList akaList;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void initRetrievedDataTypes() {
|
protected void initRetrievedDataTypes() {
|
||||||
|
@ -192,12 +190,11 @@ public class Movie extends ImdbObject<MovieDataType> {
|
||||||
this.taglineList = taglineList;
|
this.taglineList = taglineList;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, String> getAkas() {
|
public AkaList getAkaList() {
|
||||||
return akas;
|
return akaList;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setAkas(Map<String, String> akas) {
|
public void setAkaList(AkaList akaList) {
|
||||||
this.akas = akas;
|
this.akaList = akaList;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,5 +27,17 @@ abstract public class AbstractApiProcessor_1_0<C extends ImdbObject, D extends E
|
||||||
|
|
||||||
abstract void fillUpImdbObject(C imdbObject, Task task);
|
abstract void fillUpImdbObject(C imdbObject, Task task);
|
||||||
|
|
||||||
abstract Task taskByDataType(D dataType);
|
Task taskByDataType(D dataType) {
|
||||||
|
return this.taskByDataType(dataType, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract Task taskByDataType(D dataType, String imdbObjectParentId);
|
||||||
|
|
||||||
|
void initializeNestedImdbObjectParentId(Task task, String parentId) {
|
||||||
|
if (task != null) {
|
||||||
|
task.getNestedTasks().forEach(nestedTask -> {
|
||||||
|
nestedTask.setImdbObjectParentId(parentId);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
package ru.bvn13.imdbspider.spider.api.v1_0;
|
package ru.bvn13.imdbspider.spider.api.v1_0;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import ru.bvn13.imdbspider.imdb.AkaDataType;
|
||||||
import ru.bvn13.imdbspider.imdb.AkaList;
|
import ru.bvn13.imdbspider.imdb.AkaList;
|
||||||
import ru.bvn13.imdbspider.imdb.AkaListDataType;
|
import ru.bvn13.imdbspider.imdb.AkaListDataType;
|
||||||
import ru.bvn13.imdbspider.spider.tasker.Task;
|
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||||
|
@ -14,13 +16,48 @@ public class AkaListProcessor_1_0 extends AbstractApiProcessor_1_0<AkaList, AkaL
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Task taskByDataType(AkaListDataType dataType) {
|
Task taskByDataType(AkaListDataType dataType, String imdbObjectParentId) {
|
||||||
return null;
|
Task t = new Task();
|
||||||
|
t.setImdbObjectParentId(imdbObjectParentId);
|
||||||
|
t.setDataType(dataType);
|
||||||
|
switch (dataType) {
|
||||||
|
case ELEMENTS:
|
||||||
|
t.setCssSelector("#releaseinfo_content > table.ipl-zebra-list.akas-table-test-only > tbody > tr");
|
||||||
|
t.setPostprocess((task, s) -> {
|
||||||
|
int i = 0;
|
||||||
|
for (Element element : task.getCssSelectorResult()) {
|
||||||
|
Task akaTask = getApiFactory().getAkaProcessor().taskByDataType(AkaDataType.ID, task.getImdbObjectParentId())
|
||||||
|
.setUrl(task.getUrl())
|
||||||
|
.setResultType(String.class)
|
||||||
|
.setResult(String.format("%d", (i++)))
|
||||||
|
.setParentTask(task);
|
||||||
|
task.getNestedTasks().add(akaTask);
|
||||||
|
|
||||||
|
for (AkaDataType value : AkaDataType.values()) {
|
||||||
|
if (!value.equals(AkaDataType.ID)) {
|
||||||
|
Task newTask = getApiFactory().getAkaProcessor().taskByDataType(value, task.getImdbObjectParentId())
|
||||||
|
.setSourceType(Task.SOURCE_TYPE.HTML)
|
||||||
|
.setUrl(task.getUrl())
|
||||||
|
.setSourceHtml(element.html())
|
||||||
|
.setParentTask(akaTask);
|
||||||
|
akaTask.getNestedTasks().add(newTask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void fillUpImdbObject(AkaList imdbObject, Task task) {
|
public void fillUpImdbObject(AkaList akaList, Task task) {
|
||||||
|
switch ((AkaListDataType) task.getDataType()) {
|
||||||
|
case ELEMENTS:
|
||||||
|
akaList.setUrl(task.getUrl());
|
||||||
|
akaList.getRetrievedDataTypes().add((AkaListDataType) task.getDataType());
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
package ru.bvn13.imdbspider.spider.api.v1_0;
|
package ru.bvn13.imdbspider.spider.api.v1_0;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
import ru.bvn13.imdbspider.exceptions.processor.HtmlProcessorException;
|
||||||
import ru.bvn13.imdbspider.imdb.Aka;
|
import ru.bvn13.imdbspider.imdb.Aka;
|
||||||
import ru.bvn13.imdbspider.imdb.AkaDataType;
|
import ru.bvn13.imdbspider.imdb.AkaDataType;
|
||||||
import ru.bvn13.imdbspider.spider.tasker.Task;
|
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||||
|
@ -13,12 +16,69 @@ public class AkaProcessor_1_0 extends AbstractApiProcessor_1_0<Aka, AkaDataType>
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Task taskByDataType(AkaDataType dataType) {
|
Task taskByDataType(AkaDataType dataType, String imdbObjectParentId) {
|
||||||
return null;
|
Task t = new Task();
|
||||||
|
t.setImdbObjectParentId(imdbObjectParentId);
|
||||||
|
t.setDataType(dataType);
|
||||||
|
switch (dataType) {
|
||||||
|
case ID:
|
||||||
|
//
|
||||||
|
break;
|
||||||
|
case NAME:
|
||||||
|
t.setPostprocess((task, s) -> {
|
||||||
|
task.setResultType(String.class);
|
||||||
|
task.setResult("");
|
||||||
|
try {
|
||||||
|
Elements els = getApiFactory().getHtmlProcessor().process(String.format("<table><row>%s</row></table>", task.getSourceHtml()), "td.aka-item__name");
|
||||||
|
if (els.size() > 0) {
|
||||||
|
Element name = els.first();
|
||||||
|
task.setResult(name.text());
|
||||||
|
}
|
||||||
|
} catch (HtmlProcessorException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
case TITLE:
|
||||||
|
t.setPostprocess((task, s) -> {
|
||||||
|
task.setResultType(String.class);
|
||||||
|
task.setResult("");
|
||||||
|
try {
|
||||||
|
Elements els = getApiFactory().getHtmlProcessor().process(String.format("<table><row>%s</row></table>", task.getSourceHtml()), "td.aka-item__title");
|
||||||
|
if (els.size() > 0) {
|
||||||
|
Element title = els.first();
|
||||||
|
task.setResult(title.text());
|
||||||
|
}
|
||||||
|
} catch (HtmlProcessorException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void fillUpImdbObject(Aka imdbObject, Task task) {
|
void fillUpImdbObject(Aka aka, Task task) {
|
||||||
|
boolean isDone = false;
|
||||||
|
switch ((AkaDataType) task.getDataType()) {
|
||||||
|
case ID:
|
||||||
|
aka.setId((String) task.getResult());
|
||||||
|
aka.setUrl(task.getUrl());
|
||||||
|
isDone = true;
|
||||||
|
break;
|
||||||
|
case NAME:
|
||||||
|
aka.setName((String) task.getResult());
|
||||||
|
isDone = true;
|
||||||
|
break;
|
||||||
|
case TITLE:
|
||||||
|
aka.setTitle((String) task.getResult());
|
||||||
|
isDone = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isDone) {
|
||||||
|
aka.getRetrievedDataTypes().add((AkaDataType) task.getDataType());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,7 @@ public class ApiFactory_1_0 implements ApiFactory {
|
||||||
static final String URL_MAIN = "https://www.imdb.com";
|
static final String URL_MAIN = "https://www.imdb.com";
|
||||||
|
|
||||||
private static final String URL_SEARCH_TITLE = "https://www.imdb.com/find?ref_=nv_sr_fn&q={{title}}&s=tt";
|
private static final String URL_SEARCH_TITLE = "https://www.imdb.com/find?ref_=nv_sr_fn&q={{title}}&s=tt";
|
||||||
|
static final String URL_AKAS = "https://www.imdb.com/title/tt{{movie_id}}/releaseinfo";
|
||||||
|
|
||||||
static final Pattern PATTERN_MOVIE_ID_FROM_MOVIELIST = Pattern.compile("/title/tt(\\d+)/.*");
|
static final Pattern PATTERN_MOVIE_ID_FROM_MOVIELIST = Pattern.compile("/title/tt(\\d+)/.*");
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,9 @@ public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0<MovieList,
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Task taskByDataType(MovieListDataType movieListDataType) {
|
Task taskByDataType(MovieListDataType movieListDataType, String imdbObjectParentId) {
|
||||||
Task t = new Task();
|
Task t = new Task();
|
||||||
|
t.setImdbObjectParentId(imdbObjectParentId);
|
||||||
t.setDataType(movieListDataType);
|
t.setDataType(movieListDataType);
|
||||||
switch (movieListDataType) {
|
switch (movieListDataType) {
|
||||||
case ELEMENTS:
|
case ELEMENTS:
|
||||||
|
@ -41,12 +42,12 @@ public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0<MovieList,
|
||||||
if (!getApiFactory().getMovieDataTypeSet().contains(MovieDataType.ID)) {
|
if (!getApiFactory().getMovieDataTypeSet().contains(MovieDataType.ID)) {
|
||||||
getApiFactory().getMovieDataTypeSet().add(MovieDataType.ID);
|
getApiFactory().getMovieDataTypeSet().add(MovieDataType.ID);
|
||||||
}
|
}
|
||||||
Task movieTask = getApiFactory().getMovieProcessor().taskByDataType(MovieDataType.ID)
|
Task movieTask = getApiFactory().getMovieProcessor().taskByDataType(MovieDataType.ID, task.getImdbObjectParentId())
|
||||||
.setParentTask(task)
|
.setParentTask(task)
|
||||||
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href")));
|
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href")));
|
||||||
task.getNestedTasks().add(movieTask);
|
task.getNestedTasks().add(movieTask);
|
||||||
getApiFactory().getMovieDataTypeSet().forEach(movieDataType ->
|
getApiFactory().getMovieDataTypeSet().forEach(movieDataType ->
|
||||||
movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByDataType(movieDataType)
|
movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByDataType(movieDataType, task.getImdbObjectParentId())
|
||||||
.setParentTask(movieTask)
|
.setParentTask(movieTask)
|
||||||
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href")))));
|
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href")))));
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,9 +3,7 @@ package ru.bvn13.imdbspider.spider.api.v1_0;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
import ru.bvn13.imdbspider.exceptions.processor.HtmlProcessorException;
|
import ru.bvn13.imdbspider.exceptions.processor.HtmlProcessorException;
|
||||||
import ru.bvn13.imdbspider.imdb.Movie;
|
import ru.bvn13.imdbspider.imdb.*;
|
||||||
import ru.bvn13.imdbspider.imdb.MovieDataType;
|
|
||||||
import ru.bvn13.imdbspider.imdb.TaglineListDataType;
|
|
||||||
import ru.bvn13.imdbspider.imdb.accessories.Link;
|
import ru.bvn13.imdbspider.imdb.accessories.Link;
|
||||||
import ru.bvn13.imdbspider.imdb.accessories.SoundMix;
|
import ru.bvn13.imdbspider.imdb.accessories.SoundMix;
|
||||||
import ru.bvn13.imdbspider.spider.tasker.Task;
|
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||||
|
@ -24,8 +22,9 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0<Movie, MovieDat
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Task taskByDataType(MovieDataType movieDataType) {
|
Task taskByDataType(MovieDataType movieDataType, String imdbObjectParentId) {
|
||||||
Task t = new Task();
|
Task t = new Task();
|
||||||
|
t.setImdbObjectParentId(imdbObjectParentId);
|
||||||
t.setDataType(movieDataType);
|
t.setDataType(movieDataType);
|
||||||
switch (movieDataType) {
|
switch (movieDataType) {
|
||||||
case ID:
|
case ID:
|
||||||
|
@ -34,6 +33,7 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0<Movie, MovieDat
|
||||||
if (matcher.find()) {
|
if (matcher.find()) {
|
||||||
task.setResultType(String.class);
|
task.setResultType(String.class);
|
||||||
task.setResult(matcher.group(1));
|
task.setResult(matcher.group(1));
|
||||||
|
initializeNestedImdbObjectParentId(task, (String) task.getResult());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
@ -47,7 +47,7 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0<Movie, MovieDat
|
||||||
task.setResultType(String.class);
|
task.setResultType(String.class);
|
||||||
task.setResult("");
|
task.setResult("");
|
||||||
if (task.getCssSelectorResult().size() > 0) {
|
if (task.getCssSelectorResult().size() > 0) {
|
||||||
task.setResult(task.getCssSelectorResult().first().text());
|
task.setResult(task.getCssSelectorResult().first().ownText());
|
||||||
} else {
|
} else {
|
||||||
try {
|
try {
|
||||||
Elements titles = getApiFactory().getHtmlProcessor().process(s, "#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > h1"); // like title
|
Elements titles = getApiFactory().getHtmlProcessor().process(s, "#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > h1"); // like title
|
||||||
|
@ -190,7 +190,7 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0<Movie, MovieDat
|
||||||
if (task.getCssSelectorResult().size() > 0) {
|
if (task.getCssSelectorResult().size() > 0) {
|
||||||
Elements links = task.getCssSelectorResult().first().parent().select("span > a:contains(See more)");
|
Elements links = task.getCssSelectorResult().first().parent().select("span > a:contains(See more)");
|
||||||
if (links.size() > 0) {
|
if (links.size() > 0) {
|
||||||
Task newTask = getApiFactory().getTaglineListProcessor().taskByDataType(TaglineListDataType.ELEMENTS)
|
Task newTask = getApiFactory().getTaglineListProcessor().taskByDataType(TaglineListDataType.ELEMENTS, task.getImdbObjectParentId())
|
||||||
.setParentTask(task)
|
.setParentTask(task)
|
||||||
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, links.first().attr("href")));
|
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, links.first().attr("href")));
|
||||||
task.getNestedTasks().add(newTask);
|
task.getNestedTasks().add(newTask);
|
||||||
|
@ -198,6 +198,14 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0<Movie, MovieDat
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
case AKAS:
|
||||||
|
t.setPostprocess((task, s) -> {
|
||||||
|
Task akasTask = getApiFactory().getAkaListProcessor().taskByDataType(AkaListDataType.ELEMENTS, task.getImdbObjectParentId())
|
||||||
|
.setParentTask(task)
|
||||||
|
.setUrl(ApiFactory_1_0.URL_AKAS.replace("{{movie_id}}", t.getImdbObjectParentId()));
|
||||||
|
task.getNestedTasks().add(akasTask);
|
||||||
|
});
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
@ -289,6 +297,12 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0<Movie, MovieDat
|
||||||
break;
|
break;
|
||||||
case TAGLINES:
|
case TAGLINES:
|
||||||
isDone = true;
|
isDone = true;
|
||||||
|
movie.setTaglineList((TaglineList) task.getResult());
|
||||||
|
break;
|
||||||
|
case AKAS:
|
||||||
|
movie.setAkaList((AkaList) task.getResult());
|
||||||
|
isDone = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isDone) {
|
if (isDone) {
|
||||||
|
|
|
@ -18,8 +18,9 @@ public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0<TaglineLi
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Task taskByDataType(TaglineListDataType taglineListDataType) {
|
Task taskByDataType(TaglineListDataType taglineListDataType, String imdbObjectParentId) {
|
||||||
Task t = new Task();
|
Task t = new Task();
|
||||||
|
t.setImdbObjectParentId(imdbObjectParentId);
|
||||||
t.setDataType(taglineListDataType);
|
t.setDataType(taglineListDataType);
|
||||||
switch (taglineListDataType) {
|
switch (taglineListDataType) {
|
||||||
case ELEMENTS:
|
case ELEMENTS:
|
||||||
|
@ -27,13 +28,13 @@ public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0<TaglineLi
|
||||||
AtomicInteger i = new AtomicInteger(0);
|
AtomicInteger i = new AtomicInteger(0);
|
||||||
t.setPostprocess((task, s) -> {
|
t.setPostprocess((task, s) -> {
|
||||||
for (Element element : task.getCssSelectorResult()) {
|
for (Element element : task.getCssSelectorResult()) {
|
||||||
Task newTaskId = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.ID)
|
Task newTaskId = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.ID, task.getImdbObjectParentId())
|
||||||
.setParentTask(task)
|
.setParentTask(task)
|
||||||
.setUrl(task.getUrl())
|
.setUrl(task.getUrl())
|
||||||
.setResult(String.format("%d", i.getAndAdd(1)));
|
.setResult(String.format("%d", i.getAndAdd(1)));
|
||||||
task.getNestedTasks().add(newTaskId);
|
task.getNestedTasks().add(newTaskId);
|
||||||
|
|
||||||
Task newTaskText = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.TEXT)
|
Task newTaskText = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.TEXT, task.getImdbObjectParentId())
|
||||||
.setParentTask(task)
|
.setParentTask(task)
|
||||||
.setUrl(task.getUrl())
|
.setUrl(task.getUrl())
|
||||||
.setResult(element.text());
|
.setResult(element.text());
|
||||||
|
|
|
@ -14,8 +14,9 @@ public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0<Tagline, Tagl
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Task taskByDataType(TaglineDataType taglineDataType) {
|
Task taskByDataType(TaglineDataType taglineDataType, String imdbObjectParentId) {
|
||||||
Task t = new Task();
|
Task t = new Task();
|
||||||
|
t.setImdbObjectParentId(imdbObjectParentId);
|
||||||
t.setDataType(taglineDataType);
|
t.setDataType(taglineDataType);
|
||||||
switch (taglineDataType) {
|
switch (taglineDataType) {
|
||||||
case ID:
|
case ID:
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
package ru.bvn13.imdbspider.spider.composer;
|
||||||
|
|
||||||
|
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
|
||||||
|
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
|
||||||
|
import ru.bvn13.imdbspider.imdb.Aka;
|
||||||
|
import ru.bvn13.imdbspider.spider.api.ApiFactory;
|
||||||
|
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author boyko_vn at 17.01.2019
|
||||||
|
*/
|
||||||
|
public class AkaComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer<Aka> {
|
||||||
|
public AkaComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException {
|
||||||
|
super(apiFactory, imdbObjectComposerFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Aka compose(Task task) throws ImdbSpiderException {
|
||||||
|
Aka aka = new Aka();
|
||||||
|
this.apiFactory.fillUpImdbObject(aka, task);
|
||||||
|
for (Task nestedTask : task.getNestedTasks()) {
|
||||||
|
this.apiFactory.fillUpImdbObject(aka, nestedTask);
|
||||||
|
}
|
||||||
|
return aka;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
package ru.bvn13.imdbspider.spider.composer;
|
||||||
|
|
||||||
|
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
|
||||||
|
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
|
||||||
|
import ru.bvn13.imdbspider.imdb.Aka;
|
||||||
|
import ru.bvn13.imdbspider.imdb.AkaList;
|
||||||
|
import ru.bvn13.imdbspider.spider.api.ApiFactory;
|
||||||
|
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author boyko_vn at 17.01.2019
|
||||||
|
*/
|
||||||
|
public class AkaListComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer<AkaList> {
|
||||||
|
|
||||||
|
private AkaComposer akaComposer;
|
||||||
|
|
||||||
|
public AkaListComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException {
|
||||||
|
super(apiFactory, imdbObjectComposerFactory);
|
||||||
|
|
||||||
|
akaComposer = (AkaComposer) this.imdbObjectComposerFactory.getComposer(Aka.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AkaList compose(Task task) throws ImdbSpiderException {
|
||||||
|
AkaList akaList = new AkaList();
|
||||||
|
if (task.getNestedTasks().size() > 0) {
|
||||||
|
Task akaListTask = task.getNestedTasks().get(0);
|
||||||
|
|
||||||
|
this.apiFactory.fillUpImdbObject(akaList, task);
|
||||||
|
for (Task nestedTask : akaListTask.getNestedTasks()) {
|
||||||
|
akaList.getAkas().add(akaComposer.compose(nestedTask));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return akaList;
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,6 +19,9 @@ public class ImdbObjectComposerFactory {
|
||||||
private MovieComposer movieComposer;
|
private MovieComposer movieComposer;
|
||||||
private TaglineListComposer taglineListComposer;
|
private TaglineListComposer taglineListComposer;
|
||||||
private TaglineComposer taglineComposer;
|
private TaglineComposer taglineComposer;
|
||||||
|
private AkaListComposer akaListComposer;
|
||||||
|
private AkaComposer akaComposer;
|
||||||
|
|
||||||
|
|
||||||
public <C extends ImdbObject> ImdbObjectComposer getComposer(Class<C> clazz) throws ComposerNotFoundException {
|
public <C extends ImdbObject> ImdbObjectComposer getComposer(Class<C> clazz) throws ComposerNotFoundException {
|
||||||
if (clazz.isAssignableFrom(MovieList.class)) {
|
if (clazz.isAssignableFrom(MovieList.class)) {
|
||||||
|
@ -41,6 +44,16 @@ public class ImdbObjectComposerFactory {
|
||||||
taglineComposer = new TaglineComposer(apiFactory, this);
|
taglineComposer = new TaglineComposer(apiFactory, this);
|
||||||
}
|
}
|
||||||
return taglineComposer;
|
return taglineComposer;
|
||||||
|
} if (clazz.isAssignableFrom(AkaList.class)) {
|
||||||
|
if (akaListComposer == null) {
|
||||||
|
akaListComposer = new AkaListComposer(apiFactory, this);
|
||||||
|
}
|
||||||
|
return akaListComposer;
|
||||||
|
} if (clazz.isAssignableFrom(Aka.class)) {
|
||||||
|
if (akaComposer == null) {
|
||||||
|
akaComposer = new AkaComposer(apiFactory, this);
|
||||||
|
}
|
||||||
|
return akaComposer;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new ComposerNotFoundException(String.format("Composer not found: %s", clazz.getName()));
|
throw new ComposerNotFoundException(String.format("Composer not found: %s", clazz.getName()));
|
||||||
|
|
|
@ -2,6 +2,7 @@ package ru.bvn13.imdbspider.spider.composer;
|
||||||
|
|
||||||
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
|
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
|
||||||
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
|
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
|
||||||
|
import ru.bvn13.imdbspider.imdb.AkaList;
|
||||||
import ru.bvn13.imdbspider.imdb.Movie;
|
import ru.bvn13.imdbspider.imdb.Movie;
|
||||||
import ru.bvn13.imdbspider.imdb.MovieDataType;
|
import ru.bvn13.imdbspider.imdb.MovieDataType;
|
||||||
import ru.bvn13.imdbspider.imdb.TaglineList;
|
import ru.bvn13.imdbspider.imdb.TaglineList;
|
||||||
|
@ -14,11 +15,13 @@ import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||||
public class MovieComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer<Movie> {
|
public class MovieComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer<Movie> {
|
||||||
|
|
||||||
private TaglineListComposer taglineListComposer;
|
private TaglineListComposer taglineListComposer;
|
||||||
|
private AkaListComposer akaListComposer;
|
||||||
|
|
||||||
public MovieComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException {
|
public MovieComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException {
|
||||||
super(apiFactory, imdbObjectComposerFactory);
|
super(apiFactory, imdbObjectComposerFactory);
|
||||||
|
|
||||||
taglineListComposer = (TaglineListComposer) this.imdbObjectComposerFactory.getComposer(TaglineList.class);
|
taglineListComposer = (TaglineListComposer) this.imdbObjectComposerFactory.getComposer(TaglineList.class);
|
||||||
|
akaListComposer = (AkaListComposer) this.imdbObjectComposerFactory.getComposer(AkaList.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -30,6 +33,9 @@ public class MovieComposer extends AbstractImdbObjectComposer implements ImdbObj
|
||||||
if (nestedTask.getDataType().equals(MovieDataType.TAGLINES)) {
|
if (nestedTask.getDataType().equals(MovieDataType.TAGLINES)) {
|
||||||
movie.setTaglineList(taglineListComposer.compose(nestedTask));
|
movie.setTaglineList(taglineListComposer.compose(nestedTask));
|
||||||
}
|
}
|
||||||
|
if (nestedTask.getDataType().equals(MovieDataType.AKAS)) {
|
||||||
|
movie.setAkaList(akaListComposer.compose(nestedTask));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return movie;
|
return movie;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,9 +13,19 @@ import java.util.function.BiConsumer;
|
||||||
*/
|
*/
|
||||||
public class Task {
|
public class Task {
|
||||||
|
|
||||||
|
private String imdbObjectParentId;
|
||||||
private String url;
|
private String url;
|
||||||
private String html;
|
private String html;
|
||||||
|
|
||||||
|
public enum SOURCE_TYPE {
|
||||||
|
URL,
|
||||||
|
HTML
|
||||||
|
}
|
||||||
|
|
||||||
|
private SOURCE_TYPE sourceType = SOURCE_TYPE.URL;
|
||||||
|
|
||||||
|
private String sourceHtml;
|
||||||
|
|
||||||
private DataType dataType;
|
private DataType dataType;
|
||||||
private String cssSelector;
|
private String cssSelector;
|
||||||
private Elements cssSelectorResult;
|
private Elements cssSelectorResult;
|
||||||
|
@ -50,6 +60,32 @@ public class Task {
|
||||||
this.dataType = dataType;
|
this.dataType = dataType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getImdbObjectParentId() {
|
||||||
|
return imdbObjectParentId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setImdbObjectParentId(String imdbObjectParentId) {
|
||||||
|
this.imdbObjectParentId = imdbObjectParentId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SOURCE_TYPE getSourceType() {
|
||||||
|
return sourceType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task setSourceType(SOURCE_TYPE sourceType) {
|
||||||
|
this.sourceType = sourceType;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSourceHtml() {
|
||||||
|
return sourceHtml;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task setSourceHtml(String sourceHtml) {
|
||||||
|
this.sourceHtml = sourceHtml;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public String getUrl() {
|
public String getUrl() {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
@ -63,8 +99,9 @@ public class Task {
|
||||||
return html;
|
return html;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setHtml(String html) {
|
public Task setHtml(String html) {
|
||||||
this.html = html;
|
this.html = html;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCssSelector() {
|
public String getCssSelector() {
|
||||||
|
|
|
@ -57,7 +57,8 @@ public class MovieSearchTest {
|
||||||
MovieDataType.COLOR,
|
MovieDataType.COLOR,
|
||||||
MovieDataType.ASPECT_RATIO,
|
MovieDataType.ASPECT_RATIO,
|
||||||
MovieDataType.POSTER,
|
MovieDataType.POSTER,
|
||||||
MovieDataType.TAGLINES
|
MovieDataType.TAGLINES,
|
||||||
|
MovieDataType.AKAS
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
@ -94,28 +95,30 @@ public class MovieSearchTest {
|
||||||
assertTrue("Expected ASPECT_RATIO field presence", movie.isDataTypeRetrieved(MovieDataType.ASPECT_RATIO));
|
assertTrue("Expected ASPECT_RATIO field presence", movie.isDataTypeRetrieved(MovieDataType.ASPECT_RATIO));
|
||||||
assertTrue("Expected POSTER field presence", movie.isDataTypeRetrieved(MovieDataType.POSTER));
|
assertTrue("Expected POSTER field presence", movie.isDataTypeRetrieved(MovieDataType.POSTER));
|
||||||
assertTrue("Expected TAGLINES field presence", movie.isDataTypeRetrieved(MovieDataType.TAGLINES));
|
assertTrue("Expected TAGLINES field presence", movie.isDataTypeRetrieved(MovieDataType.TAGLINES));
|
||||||
|
assertTrue("Expected AKAS field presence", movie.isDataTypeRetrieved(MovieDataType.AKAS));
|
||||||
|
|
||||||
assertEquals("Expected that first in search result has ID = 0088247, but given: "+movie.getId(), "0088247", movie.getId());
|
assertEquals("Expected that first in search result has ID = 0088247, but given: "+movie.getId(), "0088247", movie.getId());
|
||||||
assertEquals("Expected original movie name: The Terminator, but given: "+movie.getOriginalTitle(), "The Terminator", movie.getOriginalTitle());
|
assertEquals("Expected original movie name: The Terminator, but given: "+movie.getOriginalTitle(), "The Terminator", movie.getOriginalTitle());
|
||||||
|
//assertTrue("Expected original movie name starts with: The Terminator, but given: "+movie.getOriginalTitle(), movie.getOriginalTitle().startsWith("The Terminator"));
|
||||||
assertEquals("Expected year = "+movie.getYear(), Integer.valueOf(1984), movie.getYear());
|
assertEquals("Expected year = "+movie.getYear(), Integer.valueOf(1984), movie.getYear());
|
||||||
|
|
||||||
assertEquals("Invalid storyline", TERMINATOR_STORYLINE, movie.getStoryline());
|
assertEquals("Invalid storyline", TERMINATOR_STORYLINE, movie.getStoryline());
|
||||||
//assertEquals(TERMINATOR_TAGLINES, movie.getRandomTagline());
|
//assertEquals(TERMINATOR_TAGLINES, movie.getRandomTagline());
|
||||||
assertTrue("Expected that genre Action is present", movie.getGenres().contains("Action"));
|
assertTrue("Expected that genre Action was present", movie.getGenres().contains("Action"));
|
||||||
assertTrue("Expected that genre Sci-Fi is present", movie.getGenres().contains("Sci-Fi"));
|
assertTrue("Expected that genre Sci-Fi was present", movie.getGenres().contains("Sci-Fi"));
|
||||||
|
|
||||||
//assertEquals("Expected that certificate is 16+ but given: "+movie.getCertificate(), "16+", movie.getCertificate()); //cannot be blocker - it depends on locale, which IMDB determines on geo-location (it seems)
|
//assertEquals("Expected that certificate is 16+ but given: "+movie.getCertificate(), "16+", movie.getCertificate()); //cannot be blocker - it depends on locale, which IMDB determines on geo-location (it seems)
|
||||||
|
|
||||||
//assertTrue(movie.getOfficialSites().contains("Facebook"));
|
//assertTrue(movie.getOfficialSites().contains("Facebook"));
|
||||||
assertTrue("Expected than at least one site is present", movie.getOfficialSites().size() > 0);
|
assertTrue("Expected than at least one site was present", movie.getOfficialSites().size() > 0);
|
||||||
assertEquals("Expected that first site is Facebook", "Facebook", movie.getOfficialSites().get(0).getTitle());
|
assertEquals("Expected that first site is Facebook", "Facebook", movie.getOfficialSites().get(0).getTitle());
|
||||||
//assertEquals(TERMINATOR_FACEBOOK_URL, movie.getOfficialSites().get(0).getUrl()); //not comparable, dynamic link
|
//assertEquals(TERMINATOR_FACEBOOK_URL, movie.getOfficialSites().get(0).getUrl()); //not comparable, dynamic link
|
||||||
|
|
||||||
assertTrue("Expected at least one country is present", movie.getCountries().size() > 0);
|
assertTrue("Expected at least one country was present", movie.getCountries().size() > 0);
|
||||||
assertTrue("Expected country UK", movie.getCountries().contains("UK"));
|
assertTrue("Expected country UK", movie.getCountries().contains("UK"));
|
||||||
assertTrue("Expected country USA", movie.getCountries().contains("USA"));
|
assertTrue("Expected country USA", movie.getCountries().contains("USA"));
|
||||||
|
|
||||||
assertTrue("Expected at least one language is present", movie.getLanguages().size() > 0);
|
assertTrue("Expected at least one language was present", movie.getLanguages().size() > 0);
|
||||||
assertTrue("Expected language English", movie.getLanguages().contains("English"));
|
assertTrue("Expected language English", movie.getLanguages().contains("English"));
|
||||||
assertTrue("Expected language Spanish", movie.getLanguages().contains("Spanish"));
|
assertTrue("Expected language Spanish", movie.getLanguages().contains("Spanish"));
|
||||||
|
|
||||||
|
@ -151,7 +154,7 @@ public class MovieSearchTest {
|
||||||
|
|
||||||
|
|
||||||
// taglines
|
// taglines
|
||||||
assertNotNull("Expected that tagline list presence", movie.getTaglineList());
|
assertNotNull("Expected that tagline list was present", movie.getTaglineList());
|
||||||
assertEquals("Expected 5 taglines but given: "+movie.getTaglineList().getTaglines().size(), 5, movie.getTaglineList().getTaglines().size());
|
assertEquals("Expected 5 taglines but given: "+movie.getTaglineList().getTaglines().size(), 5, movie.getTaglineList().getTaglines().size());
|
||||||
|
|
||||||
boolean hasTagline1 = false, hasTagline2 = false, hasTagline3 = false, hasTagline4 = false, hasTagline5 = false;
|
boolean hasTagline1 = false, hasTagline2 = false, hasTagline3 = false, hasTagline4 = false, hasTagline5 = false;
|
||||||
|
@ -170,5 +173,9 @@ public class MovieSearchTest {
|
||||||
assertTrue("Expected Tagline 3 presence", hasTagline3);
|
assertTrue("Expected Tagline 3 presence", hasTagline3);
|
||||||
assertTrue("Expected Tagline 4 presence", hasTagline4);
|
assertTrue("Expected Tagline 4 presence", hasTagline4);
|
||||||
assertTrue("Expected Tagline 5 presence", hasTagline5);
|
assertTrue("Expected Tagline 5 presence", hasTagline5);
|
||||||
|
|
||||||
|
// akas
|
||||||
|
assertNotNull("Expected that akas list was present", movie.getAkaList());
|
||||||
|
assertEquals(64, movie.getAkaList().getAkas().size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue