implemented setting various data type set to be retrieved, implemented retrieving original title for movie

master
Vyacheslav Boyko 2019-01-11 17:00:51 +03:00
parent fadf6501e9
commit 96880149dc
5 changed files with 48 additions and 9 deletions

View File

@ -56,6 +56,8 @@ public class ImdbSpider {
public MovieList searchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException {
apiFactory.setMovieDataTypeSet(dataTypes);
String url = URL_SEARCH_TITLE.replace("{{title}}", URLEncoder.encode(title, Charset.forName("utf-8")));
List<Task> tasks = new ArrayList<>();

View File

@ -9,6 +9,7 @@ public enum MovieDataType implements DataType {
ID("id"),
TITLE("title"),
ORIGINAL_TITLE("original_title"),
YEAR("year"),
AKAS("akas")

View File

@ -3,8 +3,11 @@ package ru.bvn13.imdbspider.spider.api;
import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException;
import ru.bvn13.imdbspider.imdb.DataType;
import ru.bvn13.imdbspider.imdb.ImdbObject;
import ru.bvn13.imdbspider.imdb.MovieDataType;
import ru.bvn13.imdbspider.spider.tasker.Task;
import java.util.EnumSet;
/**
* @author boyko_vn at 09.01.2019
*/
@ -14,4 +17,10 @@ public interface ApiFactory {
void fillUpImdbObject(ImdbObject imdbObject, Task task);
EnumSet<MovieDataType> getDefaultMovieDataTypeSet();
void setMovieDataTypeSet(EnumSet<MovieDataType> movieDataTypeSet);
EnumSet<MovieDataType> getMovieDataTypeSet();
}

View File

@ -20,7 +20,8 @@ public class ApiFactory_1_0 implements ApiFactory {
private final Pattern PATTERN_MOVIE_ID_FROM_MOVIELIST = Pattern.compile("/title/tt(\\d+)/.*");
private EnumSet<MovieDataType> defaultMovieDataType = EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.YEAR);
private EnumSet<MovieDataType> defaultMovieDataTypeSet = EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.YEAR);
private EnumSet<MovieDataType> movieDataTypeSet;
@Override
public Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException {
@ -66,6 +67,17 @@ public class ApiFactory_1_0 implements ApiFactory {
task.setResult(task.getCssSelectorResult().first().wholeText().trim());
});
break;
case ORIGINAL_TITLE:
t.setCssSelector("#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > div.originalTitle");
t.setPostprocess((task, s) -> {
task.setResultType(String.class);
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().ownText());
} else {
task.setResult("");
}
});
break;
case YEAR:
t.setCssSelector("#titleYear > a");
t.setPostprocess((task, s) -> {
@ -102,14 +114,17 @@ public class ApiFactory_1_0 implements ApiFactory {
}
}
Element link = element.select("a").first();
if (!defaultMovieDataType.contains(MovieDataType.ID)) {
defaultMovieDataType.add(MovieDataType.ID);
if (movieDataTypeSet == null) {
movieDataTypeSet = defaultMovieDataTypeSet;
}
if (!movieDataTypeSet.contains(MovieDataType.ID)) {
movieDataTypeSet.add(MovieDataType.ID);
}
Task movieTask = this.taskByMovieDataType(MovieDataType.ID)
.setParentTask(task)
.setUrl(String.format("%s%s", URL_MAIN, link.attr("href")));
task.getNestedTasks().add(movieTask);
defaultMovieDataType.forEach(movieDataType -> movieTask.getNestedTasks().add(this.taskByMovieDataType(movieDataType)
movieDataTypeSet.forEach(movieDataType -> movieTask.getNestedTasks().add(this.taskByMovieDataType(movieDataType)
.setParentTask(movieTask)
.setUrl(String.format("%s%s", URL_MAIN, link.attr("href")))));
}
@ -128,6 +143,9 @@ public class ApiFactory_1_0 implements ApiFactory {
case TITLE:
movie.setTitle((String) task.getResult());
break;
case ORIGINAL_TITLE:
movie.setOriginalTitle((String) task.getResult());
break;
case YEAR:
movie.setYear((Integer) task.getResult());
break;
@ -142,11 +160,18 @@ public class ApiFactory_1_0 implements ApiFactory {
}
}
public EnumSet<MovieDataType> getDefaultMovieDataType() {
return defaultMovieDataType;
@Override
public EnumSet<MovieDataType> getDefaultMovieDataTypeSet() {
return defaultMovieDataTypeSet;
}
public void setDefaultMovieDataType(EnumSet<MovieDataType> defaultMovieDataType) {
this.defaultMovieDataType = defaultMovieDataType;
@Override
public void setMovieDataTypeSet(EnumSet<MovieDataType> movieDataTypeSet) {
this.movieDataTypeSet = movieDataTypeSet;
}
@Override
public EnumSet<MovieDataType> getMovieDataTypeSet() {
return movieDataTypeSet;
}
}

View File

@ -7,8 +7,10 @@ import org.junit.Test;
import ru.bvn13.imdbspider.ImdbSpider;
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.imdb.Movie;
import ru.bvn13.imdbspider.imdb.MovieDataType;
import ru.bvn13.imdbspider.imdb.MovieList;
import java.util.EnumSet;
import java.util.List;
@ -24,7 +26,7 @@ public class AppTest
@Test
public void searchTerminatorTest() {
try {
MovieList result = spider.searchMovieByTitle("test", 5);
MovieList result = spider.searchMovieByTitle("Терминатор", 5, EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.ORIGINAL_TITLE, MovieDataType.YEAR));
} catch (ImdbSpiderException e) {
e.printStackTrace();
}