implemented collecting taglines

master
Vyacheslav Boyko 2019-01-15 15:09:56 +03:00
parent 9f118edbba
commit 19691b4d09
13 changed files with 402 additions and 22 deletions

View File

@ -32,7 +32,7 @@ public class Movie extends ImdbObject<MovieDataType> {
private String color;
private String aspectRatio;
//private List<String> taglines;
private TaglineList taglineList;
private Map<String, String> akas = new ConcurrentHashMap<>(50);
@Override
@ -184,6 +184,14 @@ public class Movie extends ImdbObject<MovieDataType> {
this.aspectRatio = aspectRatio;
}
public TaglineList getTaglineList() {
return taglineList;
}
public void setTaglineList(TaglineList taglineList) {
this.taglineList = taglineList;
}
public Map<String, String> getAkas() {
return akas;
}

View File

@ -0,0 +1,25 @@
package ru.bvn13.imdbspider.imdb;
import java.util.EnumSet;
/**
* @author boyko_vn at 15.01.2019
*/
public class Tagline extends ImdbObject<TaglineDataType> {
private String text;
@Override
protected void initRetrievedDataTypes() {
this.retrievedDataTypes = EnumSet.noneOf(TaglineDataType.class);
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
}

View File

@ -0,0 +1,26 @@
package ru.bvn13.imdbspider.imdb;
import java.util.EnumSet;
/**
* @author boyko_vn at 15.01.2019
*/
public enum TaglineDataType implements DataType {
ID("id"),
TEXT("text")
;
private String value;
TaglineDataType(String v) {
value = v;
}
public static final EnumSet<TaglineDataType> ALL_DATA = EnumSet.allOf(TaglineDataType.class);
@Override
public String get() {
return null;
}
}

View File

@ -0,0 +1,29 @@
package ru.bvn13.imdbspider.imdb;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
/**
* @author boyko_vn at 15.01.2019
*/
public class TaglineList extends ImdbObject<TaglineListDataType> {
private List<Tagline> taglines;
@Override
protected void initRetrievedDataTypes() {
this.retrievedDataTypes = EnumSet.noneOf(TaglineListDataType.class);
}
public List<Tagline> getTaglines() {
if (taglines == null) {
taglines = new ArrayList<>();
}
return taglines;
}
public void setTaglines(List<Tagline> taglines) {
this.taglines = taglines;
}
}

View File

@ -0,0 +1,21 @@
package ru.bvn13.imdbspider.imdb;
/**
* @author boyko_vn at 15.01.2019
*/
public enum TaglineListDataType implements DataType {
ELEMENTS("elements")
;
private String value;
TaglineListDataType(String v) {
value = v;
}
@Override
public String get() {
return value;
}
}

View File

@ -17,6 +17,7 @@ import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -151,6 +152,10 @@ public class ApiFactory_1_0 implements ApiFactory {
return taskByMovieDataType((MovieDataType) dataType);
} else if (dataType instanceof MovieListDataType) {
return taskByMovieListDataType((MovieListDataType) dataType);
} else if (dataType instanceof TaglineListDataType) {
return taskByTaglineListDataType((TaglineListDataType) dataType);
} else if (dataType instanceof TaglineDataType) {
return taskByTaglineDataType((TaglineDataType) dataType);
} else {
throw new DataTypeNotSupportedException(String.format("DataType %s is not supported by API v1_0!", dataType.getClass().getName()));
}
@ -166,6 +171,14 @@ public class ApiFactory_1_0 implements ApiFactory {
if (task.getDataType() instanceof MovieListDataType) {
fillUpMovieList((MovieList) imdbObject, task);
}
} else if (imdbObject instanceof TaglineList) {
if (task.getDataType() instanceof TaglineListDataType) {
fillUpTaglineList((TaglineList) imdbObject, task);
}
} else if (imdbObject instanceof Tagline) {
if (task.getDataType() instanceof TaglineDataType) {
fillUpTagline((Tagline) imdbObject, task);
}
}
}
@ -314,6 +327,63 @@ public class ApiFactory_1_0 implements ApiFactory {
t.setCssSelector("#titleDetails > div > h4:contains(Aspect Ratio)");
t.setPostprocess(POSTPROCESS.GET_OWN_TEXT_OF_PARENT_MODE);
break;
case TAGLINES:
t.setCssSelector("#titleStoryLine > div > h4:contains(Taglines)");
t.setPostprocess((task, s) -> {
if (task.getCssSelectorResult().size() > 0) {
Elements links = task.getCssSelectorResult().first().parent().select("span > a:contains(See more)");
if (links.size() > 0) {
Task newTask = this.taskByTaglineListDataType(TaglineListDataType.ELEMENTS)
.setParentTask(task)
.setUrl(String.format("%s%s", URL_MAIN, links.first().attr("href")));
task.getNestedTasks().add(newTask);
}
}
});
break;
}
return t;
}
private Task taskByTaglineListDataType(TaglineListDataType taglineListDataType) {
Task t = new Task();
t.setDataType(taglineListDataType);
switch (taglineListDataType) {
case ELEMENTS:
t.setCssSelector("#taglines_content > div.soda");
AtomicInteger i = new AtomicInteger(0);
t.setPostprocess((task, s) -> {
for (Element element : task.getCssSelectorResult()) {
Task newTaskId = taskByTaglineDataType(TaglineDataType.ID)
.setParentTask(task)
.setUrl(task.getUrl())
.setResult(String.format("%d", i.getAndAdd(1)));
task.getNestedTasks().add(newTaskId);
Task newTaskText = taskByTaglineDataType(TaglineDataType.TEXT)
.setParentTask(task)
.setUrl(task.getUrl())
.setResult(element.text());
newTaskId.getNestedTasks().add(newTaskText);
}
});
break;
}
return t;
}
private Task taskByTaglineDataType(TaglineDataType taglineDataType) {
Task t = new Task();
t.setDataType(taglineDataType);
switch (taglineDataType) {
case ID:
//
break;
case TEXT:
t.setPostprocess((task, s) -> {
task.setResult(((String)task.getResult()).trim());
});
break;
}
return t;
}
@ -435,6 +505,8 @@ public class ApiFactory_1_0 implements ApiFactory {
movie.setAspectRatio((String) task.getResult());
isDone = true;
break;
case TAGLINES:
isDone = true;
}
if (isDone) {
@ -442,6 +514,30 @@ public class ApiFactory_1_0 implements ApiFactory {
}
}
private void fillUpTaglineList(TaglineList taglineList, Task task) {
switch ((TaglineListDataType) task.getDataType()) {
case ELEMENTS:
taglineList.setUrl(task.getUrl());
taglineList.getRetrievedDataTypes().add((TaglineListDataType) task.getDataType());
break;
}
}
private void fillUpTagline(Tagline tagline, Task task) {
switch ((TaglineDataType) task.getDataType()) {
case ID:
tagline.setUrl(task.getUrl());
tagline.setId((String) task.getResult());
tagline.getRetrievedDataTypes().add((TaglineDataType) task.getDataType());
break;
case TEXT:
tagline.setUrl(task.getUrl());
tagline.setText((String) task.getResult());
tagline.getRetrievedDataTypes().add((TaglineDataType) task.getDataType());
break;
}
}
private void fillUpMovieList(MovieList movieList, Task task) {
switch ((MovieListDataType) task.getDataType()) {
case ELEMENTS:

View File

@ -0,0 +1,20 @@
package ru.bvn13.imdbspider.spider.composer;
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
import ru.bvn13.imdbspider.spider.api.ApiFactory;
/**
* @author boyko_vn at 15.01.2019
*/
abstract public class AbstractImdbObjectComposer {
protected ApiFactory apiFactory;
protected ImdbObjectComposerFactory imdbObjectComposerFactory;
public AbstractImdbObjectComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException {
this.apiFactory = apiFactory;
this.imdbObjectComposerFactory = imdbObjectComposerFactory;
}
}

View File

@ -1,8 +1,7 @@
package ru.bvn13.imdbspider.spider.composer;
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
import ru.bvn13.imdbspider.imdb.ImdbObject;
import ru.bvn13.imdbspider.imdb.MovieList;
import ru.bvn13.imdbspider.imdb.*;
import ru.bvn13.imdbspider.spider.api.ApiFactory;
/**
@ -17,16 +16,34 @@ public class ImdbObjectComposerFactory {
}
private MovieListComposer movieListComposer;
private MovieComposer movieComposer;
private TaglineListComposer taglineListComposer;
private TaglineComposer taglineComposer;
public <C extends ImdbObject> ImdbObjectComposer getComposer(Class<C> clazz) throws ComposerNotFoundException {
if (clazz.isAssignableFrom(MovieList.class)) {
if (movieListComposer == null) {
movieListComposer = new MovieListComposer(apiFactory);
return movieListComposer;
movieListComposer = new MovieListComposer(apiFactory, this);
}
return movieListComposer;
} if (clazz.isAssignableFrom(Movie.class)) {
if (movieComposer == null) {
movieComposer = new MovieComposer(apiFactory, this);
}
return movieComposer;
} if (clazz.isAssignableFrom(TaglineList.class)) {
if (taglineListComposer == null) {
taglineListComposer = new TaglineListComposer(apiFactory, this);
}
return taglineListComposer;
} if (clazz.isAssignableFrom(Tagline.class)) {
if (taglineComposer == null) {
taglineComposer = new TaglineComposer(apiFactory, this);
}
return taglineComposer;
}
throw new ComposerNotFoundException(String.format("Composer not found: %s", clazz.getClass().getName()));
throw new ComposerNotFoundException(String.format("Composer not found: %s", clazz.getName()));
}
}

View File

@ -0,0 +1,36 @@
package ru.bvn13.imdbspider.spider.composer;
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
import ru.bvn13.imdbspider.imdb.Movie;
import ru.bvn13.imdbspider.imdb.MovieDataType;
import ru.bvn13.imdbspider.imdb.TaglineList;
import ru.bvn13.imdbspider.spider.api.ApiFactory;
import ru.bvn13.imdbspider.spider.tasker.Task;
/**
* @author boyko_vn at 15.01.2019
*/
public class MovieComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer<Movie> {
private TaglineListComposer taglineListComposer;
public MovieComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException {
super(apiFactory, imdbObjectComposerFactory);
taglineListComposer = (TaglineListComposer) this.imdbObjectComposerFactory.getComposer(TaglineList.class);
}
@Override
public Movie compose(Task task) throws ImdbSpiderException {
Movie movie = new Movie();
apiFactory.fillUpImdbObject(movie, task);
for (Task nestedTask : task.getNestedTasks()) {
apiFactory.fillUpImdbObject(movie, nestedTask);
if (nestedTask.getDataType().equals(MovieDataType.TAGLINES)) {
movie.setTaglineList(taglineListComposer.compose(nestedTask));
}
}
return movie;
}
}

View File

@ -1,5 +1,7 @@
package ru.bvn13.imdbspider.spider.composer;
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
import ru.bvn13.imdbspider.imdb.Movie;
import ru.bvn13.imdbspider.imdb.MovieList;
import ru.bvn13.imdbspider.spider.api.ApiFactory;
@ -8,12 +10,14 @@ import ru.bvn13.imdbspider.spider.tasker.Task;
/**
* @author boyko_vn at 10.01.2019
*/
public class MovieListComposer implements ImdbObjectComposer<MovieList> {
public class MovieListComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer<MovieList> {
private ApiFactory apiFactory;
private MovieComposer movieComposer;
public MovieListComposer(ApiFactory apiFactory) {
this.apiFactory = apiFactory;
public MovieListComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException {
super(apiFactory, imdbObjectComposerFactory);
this.movieComposer = (MovieComposer) this.imdbObjectComposerFactory.getComposer(Movie.class);
}
@Override
@ -22,11 +26,10 @@ public class MovieListComposer implements ImdbObjectComposer<MovieList> {
apiFactory.fillUpImdbObject(movieList, task);
for (Task movieTask : task.getNestedTasks()) {
Movie movie = new Movie();
movieList.getMovies().add(movie);
apiFactory.fillUpImdbObject(movie, movieTask);
for (Task nestedTask : movieTask.getNestedTasks()) {
apiFactory.fillUpImdbObject(movie, nestedTask);
try {
movieList.getMovies().add(this.movieComposer.compose(movieTask));
} catch (ImdbSpiderException e) {
e.printStackTrace();
}
}
return movieList;

View File

@ -0,0 +1,28 @@
package ru.bvn13.imdbspider.spider.composer;
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
import ru.bvn13.imdbspider.imdb.Tagline;
import ru.bvn13.imdbspider.spider.api.ApiFactory;
import ru.bvn13.imdbspider.spider.tasker.Task;
/**
* @author boyko_vn at 15.01.2019
*/
public class TaglineComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer<Tagline> {
public TaglineComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException {
super(apiFactory, imdbObjectComposerFactory);
}
@Override
public Tagline compose(Task task) throws ImdbSpiderException {
Tagline tagline = new Tagline();
this.apiFactory.fillUpImdbObject(tagline, task);
for (Task nestedTask : task.getNestedTasks()) {
this.apiFactory.fillUpImdbObject(tagline, nestedTask);
}
return tagline;
}
}

View File

@ -0,0 +1,42 @@
package ru.bvn13.imdbspider.spider.composer;
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.exceptions.composer.ComposerNotFoundException;
import ru.bvn13.imdbspider.imdb.Tagline;
import ru.bvn13.imdbspider.imdb.TaglineList;
import ru.bvn13.imdbspider.spider.api.ApiFactory;
import ru.bvn13.imdbspider.spider.tasker.Task;
/**
* @author boyko_vn at 15.01.2019
*/
public class TaglineListComposer extends AbstractImdbObjectComposer implements ImdbObjectComposer<TaglineList> {
private TaglineComposer taglineComposer;
public TaglineListComposer(ApiFactory apiFactory, ImdbObjectComposerFactory imdbObjectComposerFactory) throws ComposerNotFoundException {
super(apiFactory, imdbObjectComposerFactory);
this.taglineComposer = (TaglineComposer) this.imdbObjectComposerFactory.getComposer(Tagline.class);
}
@Override
public TaglineList compose(Task task) {
TaglineList taglineList = new TaglineList();
if (task.getNestedTasks().size() > 0) {
Task taglineListTag = task.getNestedTasks().get(0);
this.apiFactory.fillUpImdbObject(taglineList, taglineListTag);
for (Task nestedTask : taglineListTag.getNestedTasks()) {
try {
taglineList.getTaglines().add(this.taglineComposer.compose(nestedTask));
} catch (ImdbSpiderException e) {
e.printStackTrace();
}
}
}
return taglineList;
}
}

View File

@ -1,8 +1,5 @@
package ru.bvn13.imdbspider.runner;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.junit.BeforeClass;
import org.junit.Test;
import ru.bvn13.imdbspider.ImdbSpider;
@ -10,14 +7,23 @@ import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.imdb.Movie;
import ru.bvn13.imdbspider.imdb.MovieDataType;
import ru.bvn13.imdbspider.imdb.MovieList;
import ru.bvn13.imdbspider.imdb.Tagline;
import ru.bvn13.imdbspider.imdb.accessories.SoundMix;
import static org.junit.Assert.*;
public class MovieSearchTest {
private static final String TERMINATOR_STORYLINE = "A cyborg is sent from the future on a deadly mission. He has to kill Sarah Connor, a young woman whose life will have a great significance in years to come. Sarah has only one protector - Kyle Reese - also sent from the future. The Terminator uses his exceptional intelligence and strength to find Sarah, but is there any way to stop the seemingly indestructible cyborg ?";
private static final String TERMINATOR_POSTER_LINK = "https://m.media-amazon.com/images/M/MV5BYTViNzMxZjEtZGEwNy00MDNiLWIzNGQtZDY2MjQ1OWViZjFmXkEyXkFqcGdeQXVyNzkwMjQ5NzM@._V1_UX182_CR0,0,182,268_AL_.jpg";
private static final String TERMINATOR_TAGLINE_1 = "I'll be back!";
private static final String TERMINATOR_TAGLINE_2 = "In the Year of Darkness, 2029, the rulers of this planet devised the ultimate plan. They would reshape the Future by changing the Past. The plan required something that felt no pity. No pain. No fear. Something unstoppable. They created 'THE TERMINATOR'";
private static final String TERMINATOR_TAGLINE_3 = "The thing that won't die, in the nightmare that won't end.";
private static final String TERMINATOR_TAGLINE_4 = "Your future is in its hands.";
private static final String TERMINATOR_TAGLINE_5 = "La sua missione e una sola: distruggere, uccidere... (His one and only mission: to destroy, to kill...) (Italian DVD)";
private static ImdbSpider spider;
@BeforeClass
@ -47,7 +53,8 @@ public class MovieSearchTest {
MovieDataType.SOUND_MIXES,
MovieDataType.COLOR,
MovieDataType.ASPECT_RATIO,
MovieDataType.POSTER
MovieDataType.POSTER,
MovieDataType.TAGLINES
);
@ -73,6 +80,7 @@ public class MovieSearchTest {
assertTrue(movie.isDataTypeRetrieved(MovieDataType.COLOR));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.ASPECT_RATIO));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.POSTER));
assertTrue(movie.isDataTypeRetrieved(MovieDataType.TAGLINES));
assertEquals("0088247", movie.getId());
assertEquals("The Terminator", movie.getOriginalTitle());
@ -104,6 +112,11 @@ public class MovieSearchTest {
assertEquals("$40,000,000", movie.getCumulativeWorldwideGross());
assertEquals("107 min", movie.getRuntime());
assertEquals("Color", movie.getColor());
assertEquals("1.85 : 1", movie.getAspectRatio());
assertEquals(TERMINATOR_POSTER_LINK, movie.getPosterLink());
//sound mixes
assertTrue(movie.getSoundMixes().size() > 0);
@ -123,10 +136,26 @@ public class MovieSearchTest {
assertEquals("(DVD Re-Release)", descrDolby);
assertEquals("(DTS HD Master Audio)", descrDTS);
assertEquals("Color", movie.getColor());
assertEquals("1.85 : 1", movie.getAspectRatio());
// taglines
assertNotNull(movie.getTaglineList());
assertEquals(5, movie.getTaglineList().getTaglines().size());
assertEquals(TERMINATOR_POSTER_LINK, movie.getPosterLink());
boolean hasTagline1 = false, hasTagline2 = false, hasTagline3 = false, hasTagline4 = false, hasTagline5 = false;
for (Tagline tagline : movie.getTaglineList().getTaglines()) {
switch (tagline.getText()) {
case TERMINATOR_TAGLINE_1 : hasTagline1 = true; break;
case TERMINATOR_TAGLINE_2 : hasTagline2 = true; break;
case TERMINATOR_TAGLINE_3 : hasTagline3 = true; break;
case TERMINATOR_TAGLINE_4 : hasTagline4 = true; break;
case TERMINATOR_TAGLINE_5 : hasTagline5 = true; break;
}
}
assertTrue(hasTagline1);
assertTrue(hasTagline2);
assertTrue(hasTagline3);
assertTrue(hasTagline4);
assertTrue(hasTagline5);
}
}