mirror of https://github.com/bvn13/imdb-spider.git
implemented setting various data type set to be retrieved, implemented retrieving original title for movie
parent
fadf6501e9
commit
96880149dc
|
@ -56,6 +56,8 @@ public class ImdbSpider {
|
|||
|
||||
public MovieList searchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException {
|
||||
|
||||
apiFactory.setMovieDataTypeSet(dataTypes);
|
||||
|
||||
String url = URL_SEARCH_TITLE.replace("{{title}}", URLEncoder.encode(title, Charset.forName("utf-8")));
|
||||
|
||||
List<Task> tasks = new ArrayList<>();
|
||||
|
|
|
@ -9,6 +9,7 @@ public enum MovieDataType implements DataType {
|
|||
|
||||
ID("id"),
|
||||
TITLE("title"),
|
||||
ORIGINAL_TITLE("original_title"),
|
||||
YEAR("year"),
|
||||
AKAS("akas")
|
||||
|
||||
|
|
|
@ -3,8 +3,11 @@ package ru.bvn13.imdbspider.spider.api;
|
|||
import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException;
|
||||
import ru.bvn13.imdbspider.imdb.DataType;
|
||||
import ru.bvn13.imdbspider.imdb.ImdbObject;
|
||||
import ru.bvn13.imdbspider.imdb.MovieDataType;
|
||||
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
/**
|
||||
* @author boyko_vn at 09.01.2019
|
||||
*/
|
||||
|
@ -14,4 +17,10 @@ public interface ApiFactory {
|
|||
|
||||
void fillUpImdbObject(ImdbObject imdbObject, Task task);
|
||||
|
||||
EnumSet<MovieDataType> getDefaultMovieDataTypeSet();
|
||||
|
||||
void setMovieDataTypeSet(EnumSet<MovieDataType> movieDataTypeSet);
|
||||
|
||||
EnumSet<MovieDataType> getMovieDataTypeSet();
|
||||
|
||||
}
|
||||
|
|
|
@ -20,7 +20,8 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
|
||||
private final Pattern PATTERN_MOVIE_ID_FROM_MOVIELIST = Pattern.compile("/title/tt(\\d+)/.*");
|
||||
|
||||
private EnumSet<MovieDataType> defaultMovieDataType = EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.YEAR);
|
||||
private EnumSet<MovieDataType> defaultMovieDataTypeSet = EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.YEAR);
|
||||
private EnumSet<MovieDataType> movieDataTypeSet;
|
||||
|
||||
@Override
|
||||
public Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException {
|
||||
|
@ -66,6 +67,17 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
task.setResult(task.getCssSelectorResult().first().wholeText().trim());
|
||||
});
|
||||
break;
|
||||
case ORIGINAL_TITLE:
|
||||
t.setCssSelector("#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > div.originalTitle");
|
||||
t.setPostprocess((task, s) -> {
|
||||
task.setResultType(String.class);
|
||||
if (task.getCssSelectorResult().size() > 0) {
|
||||
task.setResult(task.getCssSelectorResult().first().ownText());
|
||||
} else {
|
||||
task.setResult("");
|
||||
}
|
||||
});
|
||||
break;
|
||||
case YEAR:
|
||||
t.setCssSelector("#titleYear > a");
|
||||
t.setPostprocess((task, s) -> {
|
||||
|
@ -102,14 +114,17 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
}
|
||||
}
|
||||
Element link = element.select("a").first();
|
||||
if (!defaultMovieDataType.contains(MovieDataType.ID)) {
|
||||
defaultMovieDataType.add(MovieDataType.ID);
|
||||
if (movieDataTypeSet == null) {
|
||||
movieDataTypeSet = defaultMovieDataTypeSet;
|
||||
}
|
||||
if (!movieDataTypeSet.contains(MovieDataType.ID)) {
|
||||
movieDataTypeSet.add(MovieDataType.ID);
|
||||
}
|
||||
Task movieTask = this.taskByMovieDataType(MovieDataType.ID)
|
||||
.setParentTask(task)
|
||||
.setUrl(String.format("%s%s", URL_MAIN, link.attr("href")));
|
||||
task.getNestedTasks().add(movieTask);
|
||||
defaultMovieDataType.forEach(movieDataType -> movieTask.getNestedTasks().add(this.taskByMovieDataType(movieDataType)
|
||||
movieDataTypeSet.forEach(movieDataType -> movieTask.getNestedTasks().add(this.taskByMovieDataType(movieDataType)
|
||||
.setParentTask(movieTask)
|
||||
.setUrl(String.format("%s%s", URL_MAIN, link.attr("href")))));
|
||||
}
|
||||
|
@ -128,6 +143,9 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
case TITLE:
|
||||
movie.setTitle((String) task.getResult());
|
||||
break;
|
||||
case ORIGINAL_TITLE:
|
||||
movie.setOriginalTitle((String) task.getResult());
|
||||
break;
|
||||
case YEAR:
|
||||
movie.setYear((Integer) task.getResult());
|
||||
break;
|
||||
|
@ -142,11 +160,18 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
}
|
||||
}
|
||||
|
||||
public EnumSet<MovieDataType> getDefaultMovieDataType() {
|
||||
return defaultMovieDataType;
|
||||
@Override
|
||||
public EnumSet<MovieDataType> getDefaultMovieDataTypeSet() {
|
||||
return defaultMovieDataTypeSet;
|
||||
}
|
||||
|
||||
public void setDefaultMovieDataType(EnumSet<MovieDataType> defaultMovieDataType) {
|
||||
this.defaultMovieDataType = defaultMovieDataType;
|
||||
@Override
|
||||
public void setMovieDataTypeSet(EnumSet<MovieDataType> movieDataTypeSet) {
|
||||
this.movieDataTypeSet = movieDataTypeSet;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EnumSet<MovieDataType> getMovieDataTypeSet() {
|
||||
return movieDataTypeSet;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,8 +7,10 @@ import org.junit.Test;
|
|||
import ru.bvn13.imdbspider.ImdbSpider;
|
||||
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
|
||||
import ru.bvn13.imdbspider.imdb.Movie;
|
||||
import ru.bvn13.imdbspider.imdb.MovieDataType;
|
||||
import ru.bvn13.imdbspider.imdb.MovieList;
|
||||
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
|
@ -24,7 +26,7 @@ public class AppTest
|
|||
@Test
|
||||
public void searchTerminatorTest() {
|
||||
try {
|
||||
MovieList result = spider.searchMovieByTitle("test", 5);
|
||||
MovieList result = spider.searchMovieByTitle("Терминатор", 5, EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.ORIGINAL_TITLE, MovieDataType.YEAR));
|
||||
} catch (ImdbSpiderException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue