implemented almost all simple movie params. test included.

master
Vyacheslav Boyko 2019-01-14 18:18:42 +03:00
parent 9898b9113d
commit 0d0abda561
11 changed files with 609 additions and 46 deletions

View File

@ -6,15 +6,14 @@ import ru.bvn13.imdbspider.imdb.MovieList;
import ru.bvn13.imdbspider.spider.api.v1_0.ApiFactory_1_0;
import ru.bvn13.imdbspider.spider.composer.ImdbObjectComposerFactory;
import ru.bvn13.imdbspider.spider.composer.MovieListComposer;
import ru.bvn13.imdbspider.spider.processor.JsoupHtmlProcessor;
import ru.bvn13.imdbspider.spider.tasker.Manager;
import ru.bvn13.imdbspider.spider.tasker.Task;
import ru.bvn13.imdbspider.spider.api.ApiFactory;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.EnumSet;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.*;
/**
* @author boyko_vn at 09.01.2019
@ -27,7 +26,7 @@ public class ImdbSpider {
private ImdbObjectComposerFactory imdbObjectComposerFactory;
public static ImdbSpider withApi_1_0() {
ApiFactory apiFactory = new ApiFactory_1_0();
ApiFactory apiFactory = new ApiFactory_1_0(new JsoupHtmlProcessor());
return new ImdbSpider(apiFactory, new ImdbObjectComposerFactory(apiFactory));
}
@ -39,6 +38,11 @@ public class ImdbSpider {
manager = new Manager();
}
public ImdbSpider addHttpRequestHeader(String key, String value) {
manager.addHttpRequestHeader(key, value);
return this;
}
public MovieList searchMovieByTitle(String title) throws ImdbSpiderException {
return searchMovieByTitle(title, 0);
}
@ -47,16 +51,16 @@ public class ImdbSpider {
return searchMovieByTitle(title, maxCount, EnumSet.of(MovieDataType.TITLE));
}
public MovieList searchMovieByTitle(String title, int maxCount, MovieDataType... dataTypes) throws ImdbSpiderException {
return searchMovieByTitle(title, maxCount, EnumSet.copyOf(Arrays.asList(dataTypes)));
}
public MovieList searchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException {
List<Task> tasks = apiFactory.createTasksForSearchMovieByTitle(title, maxCount, dataTypes);
LocalDateTime dateStart = LocalDateTime.now();
try {
manager.processTasks(tasks);
} catch (ExecutionException | InterruptedException e) {
throw new ImdbSpiderException("Error has been occurred!", e);
}
manager.processTasks(tasks);
LocalDateTime dateEnd = LocalDateTime.now();
Duration diff = Duration.between(dateStart, dateEnd);
System.out.println("TIME SPENT: "+(diff.toMillis())+" msec");
@ -69,4 +73,6 @@ public class ImdbSpider {
}
}

View File

@ -1,6 +1,10 @@
package ru.bvn13.imdbspider.imdb;
import ru.bvn13.imdbspider.imdb.accessories.Link;
import ru.bvn13.imdbspider.imdb.accessories.SoundMix;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@ -12,6 +16,23 @@ public class Movie extends ImdbObject<MovieDataType> {
private String title;
private String originalTitle;
private Integer year;
private String posterLink;
private String storyline;
private String randomTagline;
private List<String> genres;
private String certificate;
private List<Link> officialSites;
private List<String> countries;
private List<String> languages;
private String releaseDate;
private String budget;
private String cumulativeWorldwideGross;
private String runtime;
private List<SoundMix> soundMixes;
private String color;
private String aspectRatio;
//private List<String> taglines;
private Map<String, String> akas = new ConcurrentHashMap<>(50);
@Override
@ -43,6 +64,126 @@ public class Movie extends ImdbObject<MovieDataType> {
this.year = year;
}
public String getPosterLink() {
return posterLink;
}
public void setPosterLink(String posterLink) {
this.posterLink = posterLink;
}
public String getStoryline() {
return storyline;
}
public void setStoryline(String storyline) {
this.storyline = storyline;
}
public String getRandomTagline() {
return randomTagline;
}
public void setRandomTagline(String randomTagline) {
this.randomTagline = randomTagline;
}
public List<String> getGenres() {
return genres;
}
public void setGenres(List<String> genres) {
this.genres = genres;
}
public String getCertificate() {
return certificate;
}
public void setCertificate(String certificate) {
this.certificate = certificate;
}
public List<Link> getOfficialSites() {
return officialSites;
}
public void setOfficialSites(List<Link> officialSites) {
this.officialSites = officialSites;
}
public List<String> getCountries() {
return countries;
}
public void setCountries(List<String> countries) {
this.countries = countries;
}
public List<String> getLanguages() {
return languages;
}
public void setLanguages(List<String> languages) {
this.languages = languages;
}
public String getReleaseDate() {
return releaseDate;
}
public void setReleaseDate(String releaseDate) {
this.releaseDate = releaseDate;
}
public String getBudget() {
return budget;
}
public void setBudget(String budget) {
this.budget = budget;
}
public String getCumulativeWorldwideGross() {
return cumulativeWorldwideGross;
}
public void setCumulativeWorldwideGross(String cumulativeWorldwideGross) {
this.cumulativeWorldwideGross = cumulativeWorldwideGross;
}
public String getRuntime() {
return runtime;
}
public void setRuntime(String runtime) {
this.runtime = runtime;
}
public List<SoundMix> getSoundMixes() {
return soundMixes;
}
public void setSoundMixes(List<SoundMix> soundMixes) {
this.soundMixes = soundMixes;
}
public String getColor() {
return color;
}
public void setColor(String color) {
this.color = color;
}
public String getAspectRatio() {
return aspectRatio;
}
public void setAspectRatio(String aspectRatio) {
this.aspectRatio = aspectRatio;
}
public Map<String, String> getAkas() {
return akas;
}

View File

@ -11,6 +11,22 @@ public enum MovieDataType implements DataType {
TITLE("title"),
ORIGINAL_TITLE("original_title"),
YEAR("year"),
POSTER("poster"),
STORYLINE("storyline"),
RANDOM_TAGLINE("random_tagline"),
GENRES("genres"),
CERTIFICATE("certificate"),
OFFICIAL_SITES("official_sites"),
COUNTRIES("countries"),
LANGUAGES("languages"),
RELEASE_DATE("release_date"),
BUDGET("budget"),
CUMULATIVE_WORLDWIDE_GROSS("cumulative worldwide gross"),
RUNTIME("runtime"),
SOUND_MIXES("sound_mixes"),
COLOR("color"),
ASPECT_RATIO("aspect_ratio"),
TAGLINES("taglines"),
AKAS("akas")
;

View File

@ -0,0 +1,28 @@
package ru.bvn13.imdbspider.imdb.accessories;
/**
* @author boyko_vn at 14.01.2019
*/
public class Link {
private String url;
private String title;
public String getUrl() {
return url;
}
public Link setUrl(String url) {
this.url = url;
return this;
}
public String getTitle() {
return title;
}
public Link setTitle(String title) {
this.title = title;
return this;
}
}

View File

@ -0,0 +1,28 @@
package ru.bvn13.imdbspider.imdb.accessories;
/**
* @author boyko_vn at 14.01.2019
*/
public class SoundMix {
private String name;
private String description;
public String getName() {
return name;
}
public SoundMix setName(String name) {
this.name = name;
return this;
}
public String getDescription() {
return description;
}
public SoundMix setDescription(String description) {
this.description = description;
return this;
}
}

View File

@ -18,10 +18,6 @@ public interface ApiFactory {
List<Task> createTasksForSearchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException;
default List<Task> createTasksForSearchMovieByTitle(String title, int maxCount, MovieDataType... dataTypes) throws ImdbSpiderException {
return createTasksForSearchMovieByTitle(title, maxCount, EnumSet.copyOf(Arrays.asList(dataTypes)));
}
Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException;
void fillUpImdbObject(ImdbObject imdbObject, Task task);

View File

@ -1,10 +1,15 @@
package ru.bvn13.imdbspider.spider.api.v1_0;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException;
import ru.bvn13.imdbspider.exceptions.processor.HtmlProcessorException;
import ru.bvn13.imdbspider.imdb.*;
import ru.bvn13.imdbspider.imdb.accessories.Link;
import ru.bvn13.imdbspider.imdb.accessories.SoundMix;
import ru.bvn13.imdbspider.spider.api.ApiFactory;
import ru.bvn13.imdbspider.spider.processor.HtmlProcessor;
import ru.bvn13.imdbspider.spider.tasker.Task;
import java.net.URLEncoder;
@ -12,15 +17,18 @@ import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author boyko_vn at 09.01.2019
*
* IMDB :: SPIDER :: API :: version 1.0 (started 09.01.2019)
*/
public class ApiFactory_1_0 implements ApiFactory {
private static final String URL_MAIN = "https://www.imdb.com";
public static final String URL_MAIN = "https://www.imdb.com";
private static final String URL_SEARCH_TITLE = "https://www.imdb.com/find?ref_=nv_sr_fn&q={{title}}&s=tt";
@ -29,6 +37,90 @@ public class ApiFactory_1_0 implements ApiFactory {
private EnumSet<MovieDataType> defaultMovieDataTypeSet = EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.YEAR);
private EnumSet<MovieDataType> movieDataTypeSet;
private HtmlProcessor htmlProcessor;
private static class POSTPROCESS {
static final BiConsumer<Task, String> GET_TEXT_OF_FIRST_ELEMENT = (task, s) -> {
task.setResultType(String.class);
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().text().trim());
} else {
task.setResult("");
}
};
static final BiConsumer<Task, String> GET_OWN_TEXT_OF_FIRST_ELEMENT = (task, s) -> {
task.setResultType(String.class);
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().ownText().trim());
} else {
task.setResult("");
}
};
static final BiConsumer<Task, String> GET_WHOLE_TEXT_OF_FIRST_ELEMENT = (task, s) -> {
task.setResultType(String.class);
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().wholeText().trim());
} else {
task.setResult("");
}
};
static final BiConsumer<Task, String> GET_OWN_TEXT_OF_PARENT_MODE = (task, s) -> {
task.setResultType(String.class);
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().parent().ownText().trim());
} else {
task.setResult("");
}
};
static final BiConsumer<Task, String> COLLECT_TITLES_OF_ALL_NESTED_LINKS_OF_PARENT_NODE = (task, s) -> {
task.setResultType(List.class);
List<String> titles = new ArrayList<>();
if (task.getCssSelectorResult().size() > 0) {
for (Element title : task.getCssSelectorResult().first().parent().select("a")) {
titles.add(title.text().trim());
}
}
task.setResult(titles);
};
static final BiConsumer<Task, String> COLLECT_ALL_NESTED_LINKS_OF_PARENT_NODE = (task, s) -> {
task.setResultType(List.class);
List<Link> titles = new ArrayList<>();
if (task.getCssSelectorResult().size() > 0) {
for (Element link : task.getCssSelectorResult().first().parent().select("a")) {
final String url = link.attr("href").trim();
titles.add(new Link()
.setTitle(link.text().trim())
.setUrl((url.startsWith("/") ? String.format("%s%s", URL_MAIN, url) : url))
);
}
}
task.setResult(titles);
};
static final BiConsumer<Task, String> GET_TITLE_OF_FIRST_LINK_IN_PARENT_MODE = (task, s) -> {
task.setResultType(String.class);
task.setResult("");
if (task.getCssSelectorResult().size() > 0) {
Elements links = task.getCssSelectorResult().first().parent().select("a");
if (links.size() > 0) {
task.setResult(links.first().text().trim());
}
}
};
}
public ApiFactory_1_0(HtmlProcessor htmlProcessor) {
this.htmlProcessor = htmlProcessor;
}
@Override
public List<Task> createTasksForSearchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException {
@ -92,21 +184,11 @@ public class ApiFactory_1_0 implements ApiFactory {
break;
case TITLE:
t.setCssSelector("#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > h1");
t.setPostprocess((task, s) -> {
task.setResultType(String.class);
task.setResult(task.getCssSelectorResult().first().wholeText().trim());
});
t.setPostprocess(POSTPROCESS.GET_WHOLE_TEXT_OF_FIRST_ELEMENT);
break;
case ORIGINAL_TITLE:
t.setCssSelector("#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > div.originalTitle");
t.setPostprocess((task, s) -> {
task.setResultType(String.class);
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().ownText());
} else {
task.setResult("");
}
});
t.setPostprocess(POSTPROCESS.GET_OWN_TEXT_OF_FIRST_ELEMENT);
break;
case YEAR:
t.setCssSelector("#titleYear > a");
@ -123,6 +205,115 @@ public class ApiFactory_1_0 implements ApiFactory {
}
});
break;
case POSTER:
t.setCssSelector("#title-overview-widget > div.vital > div.slate_wrapper > div.poster > a > img");
t.setPostprocess((task, s) -> {
task.setResultType(String.class);
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().attr("src"));
} else {
task.setResult("");
}
});
break;
case STORYLINE:
t.setCssSelector("#titleStoryLine > div:nth-child(3) > p > span");
t.setPostprocess(POSTPROCESS.GET_TEXT_OF_FIRST_ELEMENT);
break;
case RANDOM_TAGLINE:
t.setCssSelector("#titleStoryLine > div > h4:contains(Taglines)"); //#titleStoryLine > div:nth-child(8) > h4
t.setPostprocess(POSTPROCESS.GET_OWN_TEXT_OF_PARENT_MODE);
break;
case GENRES:
t.setCssSelector("#titleStoryLine > div > h4:contains(Genres)");
t.setPostprocess(POSTPROCESS.COLLECT_TITLES_OF_ALL_NESTED_LINKS_OF_PARENT_NODE);
break;
case CERTIFICATE:
t.setCssSelector("#titleStoryLine > div > h4:contains(Certificate)");
t.setPostprocess((task, s) -> {
task.setResultType(String.class);
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().parent().select("span:nth-child(2)").first().text().trim());
}
});
break;
case OFFICIAL_SITES:
t.setCssSelector("#titleDetails > div > h4:contains(Official Sites)");
t.setPostprocess(POSTPROCESS.COLLECT_ALL_NESTED_LINKS_OF_PARENT_NODE);
break;
case COUNTRIES:
t.setCssSelector("#titleDetails > div > h4:contains(Country)");
t.setPostprocess(POSTPROCESS.COLLECT_TITLES_OF_ALL_NESTED_LINKS_OF_PARENT_NODE);
break;
case LANGUAGES:
t.setCssSelector("#titleDetails > div > h4:contains(Language)");
t.setPostprocess(POSTPROCESS.COLLECT_TITLES_OF_ALL_NESTED_LINKS_OF_PARENT_NODE);
break;
case RELEASE_DATE:
t.setCssSelector("#titleDetails > div > h4:contains(Release Date)");
t.setPostprocess(POSTPROCESS.GET_OWN_TEXT_OF_PARENT_MODE);
break;
case BUDGET:
t.setCssSelector("#titleDetails > div > h4:contains(Budget)");
t.setPostprocess(POSTPROCESS.GET_OWN_TEXT_OF_PARENT_MODE);
break;
case CUMULATIVE_WORLDWIDE_GROSS:
t.setCssSelector("#titleDetails > div > h4:contains(Cumulative Worldwide Gross)");
t.setPostprocess(POSTPROCESS.GET_OWN_TEXT_OF_PARENT_MODE);
break;
case RUNTIME:
t.setCssSelector("#titleDetails > div > h4:contains(Runtime)");
t.setPostprocess((task, s) -> {
task.setResultType(String.class);
task.setResult("");
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().parent().text().replace("Runtime:", "").trim());
}
});
break;
case SOUND_MIXES:
t.setCssSelector("#titleDetails > div > h4:contains(Sound Mix)");
t.setPostprocess((task, s) -> {
task.setResultType(List.class);
List<SoundMix> titles = new ArrayList<>();
if (task.getCssSelectorResult().size() > 0) {
String html = task.getCssSelectorResult().first().parent().html();
html = html.replace("\r", "");
html = html.replace("\n", "");
html = html.replace("<span class=\"ghost\">|</span>", "|");
// remove header: <h4 class="inline">Sound Mix:</h4>
html = html.replaceAll("(<h4.+\\/h4>)", "");
String[] lines = html.split("\\|");
for (int i=0; i<lines.length; i++) {
try {
Elements els = htmlProcessor.process(String.format("<div>%s</div>", lines[i]), "div");
if (els.size() > 0) {
Element div = els.first();
Element link = div.selectFirst("a");
titles.add(new SoundMix()
.setName(link.text().trim())
.setDescription(div.ownText())
);
}
} catch (HtmlProcessorException e) {
e.printStackTrace();
}
}
}
task.setResult(titles);
});
break;
case COLOR:
t.setCssSelector("#titleDetails > div > h4:contains(Color)");
t.setPostprocess(POSTPROCESS.GET_TITLE_OF_FIRST_LINK_IN_PARENT_MODE);
break;
case ASPECT_RATIO:
t.setCssSelector("#titleDetails > div > h4:contains(Aspect Ratio)");
t.setPostprocess(POSTPROCESS.GET_OWN_TEXT_OF_PARENT_MODE);
break;
}
return t;
}
@ -184,6 +375,66 @@ public class ApiFactory_1_0 implements ApiFactory {
movie.setYear((Integer) task.getResult());
isDone = true;
break;
case POSTER:
movie.setPosterLink((String) task.getResult());
isDone = true;
break;
case STORYLINE:
movie.setStoryline((String) task.getResult());
isDone = true;
break;
case RANDOM_TAGLINE:
movie.setRandomTagline((String) task.getResult());
isDone = true;
break;
case GENRES:
movie.setGenres((List<String>) task.getResult());
isDone = true;
break;
case CERTIFICATE:
movie.setCertificate((String) task.getResult());
isDone = true;
break;
case OFFICIAL_SITES:
movie.setOfficialSites((List<Link>) task.getResult());
isDone = true;
break;
case COUNTRIES:
movie.setCountries((List<String>) task.getResult());
isDone = true;
break;
case LANGUAGES:
movie.setLanguages((List<String>) task.getResult());
isDone = true;
break;
case RELEASE_DATE:
movie.setReleaseDate((String) task.getResult());
isDone = true;
break;
case BUDGET:
movie.setBudget((String) task.getResult());
isDone = true;
break;
case CUMULATIVE_WORLDWIDE_GROSS:
movie.setCumulativeWorldwideGross((String) task.getResult());
isDone = true;
break;
case RUNTIME:
movie.setRuntime((String) task.getResult());
isDone = true;
break;
case SOUND_MIXES:
movie.setSoundMixes((List<SoundMix>) task.getResult());
isDone = true;
break;
case COLOR:
movie.setColor((String) task.getResult());
isDone = true;
break;
case ASPECT_RATIO:
movie.setAspectRatio((String) task.getResult());
isDone = true;
break;
}
if (isDone) {

View File

@ -34,7 +34,7 @@ public class HtmlExtractor {
: resultString;
}
public String getHtml(String url) throws HtmlExtractorException {
public String getHtml(String url, Map<String, String> headers) throws HtmlExtractorException {
URL obj = null;
@ -52,6 +52,9 @@ public class HtmlExtractor {
}
connection.setRequestProperty("Accept", "text/html");
for (Map.Entry<String, String> header : headers.entrySet()) {
connection.setRequestProperty(header.getKey(), header.getValue());
}
try {
connection.setRequestMethod("GET");

View File

@ -3,6 +3,7 @@ package ru.bvn13.imdbspider.spider.tasker;
import ru.bvn13.imdbspider.exceptions.extractor.HtmlExtractorException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
@ -12,14 +13,13 @@ import java.util.concurrent.*;
*/
public class Manager {
private ExecutorService executor;
private Map<String, String> httpRequestHeaders = new HashMap<>();
public Manager() {
this.executor = Executors.newCachedThreadPool();
public void addHttpRequestHeader(String key, String value) {
this.httpRequestHeaders.put(key, value);
}
public void processTasks(List<Task> allTasks) throws ExecutionException, InterruptedException {
public void processTasks(List<Task> allTasks) {
Map<String, List<Task>> groupedTasks = new ConcurrentHashMap<>(allTasks.size());
@ -39,7 +39,7 @@ public class Manager {
groupedTasks.entrySet().parallelStream().forEach(stringListEntry -> {
Worker w = new Worker(stringListEntry.getKey(), stringListEntry.getValue());
try {
w.run();
w.run(httpRequestHeaders);
} catch (HtmlExtractorException e) {
e.printStackTrace();
}

View File

@ -7,7 +7,9 @@ import ru.bvn13.imdbspider.spider.extractor.HtmlExtractor;
import ru.bvn13.imdbspider.spider.processor.HtmlProcessor;
import ru.bvn13.imdbspider.spider.processor.JsoupHtmlProcessor;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
/**
@ -21,22 +23,17 @@ public class Worker {
private final HtmlExtractor htmlExtractor;
private final HtmlProcessor htmlProcessor;
private final ExecutorService executor;
public Worker(String url, List<Task> tasks) {
this.url = url;
this.tasks = tasks;
this.htmlExtractor = new HtmlExtractor();
this.htmlProcessor = new JsoupHtmlProcessor();
this.executor = Executors.newCachedThreadPool();
}
public Boolean run(Map<String, String> httpRequestHeaders) throws HtmlExtractorException {
public Boolean run() throws HtmlExtractorException {
final String html = htmlExtractor.getHtml(url);
final String html = htmlExtractor.getHtml(url, httpRequestHeaders);
tasks.parallelStream().forEach(task -> {

View File

@ -10,26 +10,123 @@ import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.imdb.Movie;
import ru.bvn13.imdbspider.imdb.MovieDataType;
import ru.bvn13.imdbspider.imdb.MovieList;
import java.util.EnumSet;
import ru.bvn13.imdbspider.imdb.accessories.SoundMix;
public class MovieSearchTest
{
public class MovieSearchTest {
private static final String TERMINATOR_STORYLINE = "A cyborg is sent from the future on a deadly mission. He has to kill Sarah Connor, a young woman whose life will have a great significance in years to come. Sarah has only one protector - Kyle Reese - also sent from the future. The Terminator uses his exceptional intelligence and strength to find Sarah, but is there any way to stop the seemingly indestructible cyborg ?";
private static final String TERMINATOR_POSTER_LINK = "https://m.media-amazon.com/images/M/MV5BYTViNzMxZjEtZGEwNy00MDNiLWIzNGQtZDY2MjQ1OWViZjFmXkEyXkFqcGdeQXVyNzkwMjQ5NzM@._V1_UX182_CR0,0,182,268_AL_.jpg";
private static ImdbSpider spider;
@BeforeClass
public static void initClass() {
spider = ImdbSpider.withApi_1_0();
spider = ImdbSpider.withApi_1_0()
.addHttpRequestHeader("Content-Language", "ru-RU");
}
@Test
public void testSearchTerminator() throws ImdbSpiderException {
MovieList result = spider.searchMovieByTitle("Терминатор", 5, EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.ORIGINAL_TITLE, MovieDataType.YEAR));
MovieList result = spider.searchMovieByTitle("Терминатор", 5,
MovieDataType.ID,
MovieDataType.TITLE,
MovieDataType.ORIGINAL_TITLE,
MovieDataType.YEAR,
MovieDataType.STORYLINE,
MovieDataType.RANDOM_TAGLINE,
MovieDataType.GENRES,
MovieDataType.CERTIFICATE,
MovieDataType.OFFICIAL_SITES,
MovieDataType.COUNTRIES,
MovieDataType.LANGUAGES,
MovieDataType.RELEASE_DATE,
MovieDataType.BUDGET,
MovieDataType.CUMULATIVE_WORLDWIDE_GROSS,
MovieDataType.RUNTIME,
MovieDataType.SOUND_MIXES,
MovieDataType.COLOR,
MovieDataType.ASPECT_RATIO,
MovieDataType.POSTER
);
assertTrue(result.getMovies().size() > 0);
Movie movie = result.getMovies().get(0);
assertTrue(movie.isDataTypeRetrieved(MovieDataType.ID));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.TITLE));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.ORIGINAL_TITLE));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.YEAR));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.STORYLINE));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.RANDOM_TAGLINE));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.GENRES));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.CERTIFICATE));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.OFFICIAL_SITES));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.COUNTRIES));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.LANGUAGES));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.RELEASE_DATE));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.BUDGET));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.CUMULATIVE_WORLDWIDE_GROSS));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.RUNTIME));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.SOUND_MIXES));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.COLOR));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.ASPECT_RATIO));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.POSTER));
assertEquals("0088247", movie.getId());
assertEquals("The Terminator", movie.getOriginalTitle());
assertEquals(Integer.valueOf(1984), movie.getYear());
assertEquals(TERMINATOR_STORYLINE, movie.getStoryline());
//assertEquals(TERMINATOR_TAGLINES, movie.getRandomTagline());
assertTrue(movie.getGenres().contains("Action"));
assertTrue(movie.getGenres().contains("Sci-Fi"));
assertEquals("16+", movie.getCertificate());
//assertTrue(movie.getOfficialSites().contains("Facebook"));
assertTrue(movie.getOfficialSites().size() > 0);
assertEquals("Facebook", movie.getOfficialSites().get(0).getTitle());
//assertEquals(TERMINATOR_FACEBOOK_URL, movie.getOfficialSites().get(0).getUrl()); //not comparable, dynamic link
assertTrue(movie.getCountries().size() > 0);
assertTrue(movie.getCountries().contains("UK"));
assertTrue(movie.getCountries().contains("USA"));
assertTrue(movie.getLanguages().size() > 0);
assertTrue(movie.getLanguages().contains("English"));
assertTrue(movie.getLanguages().contains("Spanish"));
assertEquals("26 October 1984 (USA)", movie.getReleaseDate());
assertEquals("$6,400,000", movie.getBudget());
assertEquals("$40,000,000", movie.getCumulativeWorldwideGross());
assertEquals("107 min", movie.getRuntime());
//sound mixes
assertTrue(movie.getSoundMixes().size() > 0);
boolean hasMono=false, hasDolby=false, hasDTS=false;
String descrMono="", descrDolby="", descrDTS="";
for (SoundMix soundMix : movie.getSoundMixes()) {
switch (soundMix.getName()) {
case "Mono" : hasMono = true; descrMono = soundMix.getDescription(); break;
case "Dolby" : hasDolby = true; descrDolby = soundMix.getDescription(); break;
case "DTS" : hasDTS = true; descrDTS = soundMix.getDescription(); break;
}
}
assertTrue(hasMono);
assertTrue(hasDolby);
assertTrue(hasDTS);
assertEquals("(original release)", descrMono);
assertEquals("(DVD Re-Release)", descrDolby);
assertEquals("(DTS HD Master Audio)", descrDTS);
assertEquals("Color", movie.getColor());
assertEquals("1.85 : 1", movie.getAspectRatio());
assertEquals(TERMINATOR_POSTER_LINK, movie.getPosterLink());
}
}