implemented setting various data type set to be retrieved, implemented retrieving original title for movie

master
Vyacheslav Boyko 2019-01-11 17:00:51 +03:00
parent fadf6501e9
commit 96880149dc
5 changed files with 48 additions and 9 deletions

View File

@ -56,6 +56,8 @@ public class ImdbSpider {
public MovieList searchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException { public MovieList searchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException {
apiFactory.setMovieDataTypeSet(dataTypes);
String url = URL_SEARCH_TITLE.replace("{{title}}", URLEncoder.encode(title, Charset.forName("utf-8"))); String url = URL_SEARCH_TITLE.replace("{{title}}", URLEncoder.encode(title, Charset.forName("utf-8")));
List<Task> tasks = new ArrayList<>(); List<Task> tasks = new ArrayList<>();

View File

@ -9,6 +9,7 @@ public enum MovieDataType implements DataType {
ID("id"), ID("id"),
TITLE("title"), TITLE("title"),
ORIGINAL_TITLE("original_title"),
YEAR("year"), YEAR("year"),
AKAS("akas") AKAS("akas")

View File

@ -3,8 +3,11 @@ package ru.bvn13.imdbspider.spider.api;
import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException; import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException;
import ru.bvn13.imdbspider.imdb.DataType; import ru.bvn13.imdbspider.imdb.DataType;
import ru.bvn13.imdbspider.imdb.ImdbObject; import ru.bvn13.imdbspider.imdb.ImdbObject;
import ru.bvn13.imdbspider.imdb.MovieDataType;
import ru.bvn13.imdbspider.spider.tasker.Task; import ru.bvn13.imdbspider.spider.tasker.Task;
import java.util.EnumSet;
/** /**
* @author boyko_vn at 09.01.2019 * @author boyko_vn at 09.01.2019
*/ */
@ -14,4 +17,10 @@ public interface ApiFactory {
void fillUpImdbObject(ImdbObject imdbObject, Task task); void fillUpImdbObject(ImdbObject imdbObject, Task task);
EnumSet<MovieDataType> getDefaultMovieDataTypeSet();
void setMovieDataTypeSet(EnumSet<MovieDataType> movieDataTypeSet);
EnumSet<MovieDataType> getMovieDataTypeSet();
} }

View File

@ -20,7 +20,8 @@ public class ApiFactory_1_0 implements ApiFactory {
private final Pattern PATTERN_MOVIE_ID_FROM_MOVIELIST = Pattern.compile("/title/tt(\\d+)/.*"); private final Pattern PATTERN_MOVIE_ID_FROM_MOVIELIST = Pattern.compile("/title/tt(\\d+)/.*");
private EnumSet<MovieDataType> defaultMovieDataType = EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.YEAR); private EnumSet<MovieDataType> defaultMovieDataTypeSet = EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.YEAR);
private EnumSet<MovieDataType> movieDataTypeSet;
@Override @Override
public Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException { public Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException {
@ -66,6 +67,17 @@ public class ApiFactory_1_0 implements ApiFactory {
task.setResult(task.getCssSelectorResult().first().wholeText().trim()); task.setResult(task.getCssSelectorResult().first().wholeText().trim());
}); });
break; break;
case ORIGINAL_TITLE:
t.setCssSelector("#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > div.originalTitle");
t.setPostprocess((task, s) -> {
task.setResultType(String.class);
if (task.getCssSelectorResult().size() > 0) {
task.setResult(task.getCssSelectorResult().first().ownText());
} else {
task.setResult("");
}
});
break;
case YEAR: case YEAR:
t.setCssSelector("#titleYear > a"); t.setCssSelector("#titleYear > a");
t.setPostprocess((task, s) -> { t.setPostprocess((task, s) -> {
@ -102,14 +114,17 @@ public class ApiFactory_1_0 implements ApiFactory {
} }
} }
Element link = element.select("a").first(); Element link = element.select("a").first();
if (!defaultMovieDataType.contains(MovieDataType.ID)) { if (movieDataTypeSet == null) {
defaultMovieDataType.add(MovieDataType.ID); movieDataTypeSet = defaultMovieDataTypeSet;
}
if (!movieDataTypeSet.contains(MovieDataType.ID)) {
movieDataTypeSet.add(MovieDataType.ID);
} }
Task movieTask = this.taskByMovieDataType(MovieDataType.ID) Task movieTask = this.taskByMovieDataType(MovieDataType.ID)
.setParentTask(task) .setParentTask(task)
.setUrl(String.format("%s%s", URL_MAIN, link.attr("href"))); .setUrl(String.format("%s%s", URL_MAIN, link.attr("href")));
task.getNestedTasks().add(movieTask); task.getNestedTasks().add(movieTask);
defaultMovieDataType.forEach(movieDataType -> movieTask.getNestedTasks().add(this.taskByMovieDataType(movieDataType) movieDataTypeSet.forEach(movieDataType -> movieTask.getNestedTasks().add(this.taskByMovieDataType(movieDataType)
.setParentTask(movieTask) .setParentTask(movieTask)
.setUrl(String.format("%s%s", URL_MAIN, link.attr("href"))))); .setUrl(String.format("%s%s", URL_MAIN, link.attr("href")))));
} }
@ -128,6 +143,9 @@ public class ApiFactory_1_0 implements ApiFactory {
case TITLE: case TITLE:
movie.setTitle((String) task.getResult()); movie.setTitle((String) task.getResult());
break; break;
case ORIGINAL_TITLE:
movie.setOriginalTitle((String) task.getResult());
break;
case YEAR: case YEAR:
movie.setYear((Integer) task.getResult()); movie.setYear((Integer) task.getResult());
break; break;
@ -142,11 +160,18 @@ public class ApiFactory_1_0 implements ApiFactory {
} }
} }
public EnumSet<MovieDataType> getDefaultMovieDataType() { @Override
return defaultMovieDataType; public EnumSet<MovieDataType> getDefaultMovieDataTypeSet() {
return defaultMovieDataTypeSet;
} }
public void setDefaultMovieDataType(EnumSet<MovieDataType> defaultMovieDataType) { @Override
this.defaultMovieDataType = defaultMovieDataType; public void setMovieDataTypeSet(EnumSet<MovieDataType> movieDataTypeSet) {
this.movieDataTypeSet = movieDataTypeSet;
}
@Override
public EnumSet<MovieDataType> getMovieDataTypeSet() {
return movieDataTypeSet;
} }
} }

View File

@ -7,8 +7,10 @@ import org.junit.Test;
import ru.bvn13.imdbspider.ImdbSpider; import ru.bvn13.imdbspider.ImdbSpider;
import ru.bvn13.imdbspider.exceptions.ImdbSpiderException; import ru.bvn13.imdbspider.exceptions.ImdbSpiderException;
import ru.bvn13.imdbspider.imdb.Movie; import ru.bvn13.imdbspider.imdb.Movie;
import ru.bvn13.imdbspider.imdb.MovieDataType;
import ru.bvn13.imdbspider.imdb.MovieList; import ru.bvn13.imdbspider.imdb.MovieList;
import java.util.EnumSet;
import java.util.List; import java.util.List;
@ -24,7 +26,7 @@ public class AppTest
@Test @Test
public void searchTerminatorTest() { public void searchTerminatorTest() {
try { try {
MovieList result = spider.searchMovieByTitle("test", 5); MovieList result = spider.searchMovieByTitle("Терминатор", 5, EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.ORIGINAL_TITLE, MovieDataType.YEAR));
} catch (ImdbSpiderException e) { } catch (ImdbSpiderException e) {
e.printStackTrace(); e.printStackTrace();
} }