From 96880149dc48f9a69cfcd88183a4574f35cb959d Mon Sep 17 00:00:00 2001 From: Vyacheslav Boyko Date: Fri, 11 Jan 2019 17:00:51 +0300 Subject: [PATCH] implemented setting various data type set to be retrieved, implemented retrieving original title for movie --- .../java/ru/bvn13/imdbspider/ImdbSpider.java | 2 + .../bvn13/imdbspider/imdb/MovieDataType.java | 1 + .../imdbspider/spider/api/ApiFactory.java | 9 ++++ .../spider/api/v1_0/ApiFactory_1_0.java | 41 +++++++++++++++---- .../ru/bvn13/imdbspider/runner/AppTest.java | 4 +- 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java b/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java index 5f045ba..1555005 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java +++ b/core/src/main/java/ru/bvn13/imdbspider/ImdbSpider.java @@ -56,6 +56,8 @@ public class ImdbSpider { public MovieList searchMovieByTitle(String title, int maxCount, EnumSet dataTypes) throws ImdbSpiderException { + apiFactory.setMovieDataTypeSet(dataTypes); + String url = URL_SEARCH_TITLE.replace("{{title}}", URLEncoder.encode(title, Charset.forName("utf-8"))); List tasks = new ArrayList<>(); diff --git a/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieDataType.java b/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieDataType.java index 61979a1..d055dd4 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieDataType.java +++ b/core/src/main/java/ru/bvn13/imdbspider/imdb/MovieDataType.java @@ -9,6 +9,7 @@ public enum MovieDataType implements DataType { ID("id"), TITLE("title"), + ORIGINAL_TITLE("original_title"), YEAR("year"), AKAS("akas") diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java index 686734d..9d2f9f5 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java @@ -3,8 +3,11 @@ package ru.bvn13.imdbspider.spider.api; import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException; import ru.bvn13.imdbspider.imdb.DataType; import ru.bvn13.imdbspider.imdb.ImdbObject; +import ru.bvn13.imdbspider.imdb.MovieDataType; import ru.bvn13.imdbspider.spider.tasker.Task; +import java.util.EnumSet; + /** * @author boyko_vn at 09.01.2019 */ @@ -14,4 +17,10 @@ public interface ApiFactory { void fillUpImdbObject(ImdbObject imdbObject, Task task); + EnumSet getDefaultMovieDataTypeSet(); + + void setMovieDataTypeSet(EnumSet movieDataTypeSet); + + EnumSet getMovieDataTypeSet(); + } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java index f8839b5..bd5c35a 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java @@ -20,7 +20,8 @@ public class ApiFactory_1_0 implements ApiFactory { private final Pattern PATTERN_MOVIE_ID_FROM_MOVIELIST = Pattern.compile("/title/tt(\\d+)/.*"); - private EnumSet defaultMovieDataType = EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.YEAR); + private EnumSet defaultMovieDataTypeSet = EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.YEAR); + private EnumSet movieDataTypeSet; @Override public Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException { @@ -66,6 +67,17 @@ public class ApiFactory_1_0 implements ApiFactory { task.setResult(task.getCssSelectorResult().first().wholeText().trim()); }); break; + case ORIGINAL_TITLE: + t.setCssSelector("#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > div.originalTitle"); + t.setPostprocess((task, s) -> { + task.setResultType(String.class); + if (task.getCssSelectorResult().size() > 0) { + task.setResult(task.getCssSelectorResult().first().ownText()); + } else { + task.setResult(""); + } + }); + break; case YEAR: t.setCssSelector("#titleYear > a"); t.setPostprocess((task, s) -> { @@ -102,14 +114,17 @@ public class ApiFactory_1_0 implements ApiFactory { } } Element link = element.select("a").first(); - if (!defaultMovieDataType.contains(MovieDataType.ID)) { - defaultMovieDataType.add(MovieDataType.ID); + if (movieDataTypeSet == null) { + movieDataTypeSet = defaultMovieDataTypeSet; + } + if (!movieDataTypeSet.contains(MovieDataType.ID)) { + movieDataTypeSet.add(MovieDataType.ID); } Task movieTask = this.taskByMovieDataType(MovieDataType.ID) .setParentTask(task) .setUrl(String.format("%s%s", URL_MAIN, link.attr("href"))); task.getNestedTasks().add(movieTask); - defaultMovieDataType.forEach(movieDataType -> movieTask.getNestedTasks().add(this.taskByMovieDataType(movieDataType) + movieDataTypeSet.forEach(movieDataType -> movieTask.getNestedTasks().add(this.taskByMovieDataType(movieDataType) .setParentTask(movieTask) .setUrl(String.format("%s%s", URL_MAIN, link.attr("href"))))); } @@ -128,6 +143,9 @@ public class ApiFactory_1_0 implements ApiFactory { case TITLE: movie.setTitle((String) task.getResult()); break; + case ORIGINAL_TITLE: + movie.setOriginalTitle((String) task.getResult()); + break; case YEAR: movie.setYear((Integer) task.getResult()); break; @@ -142,11 +160,18 @@ public class ApiFactory_1_0 implements ApiFactory { } } - public EnumSet getDefaultMovieDataType() { - return defaultMovieDataType; + @Override + public EnumSet getDefaultMovieDataTypeSet() { + return defaultMovieDataTypeSet; } - public void setDefaultMovieDataType(EnumSet defaultMovieDataType) { - this.defaultMovieDataType = defaultMovieDataType; + @Override + public void setMovieDataTypeSet(EnumSet movieDataTypeSet) { + this.movieDataTypeSet = movieDataTypeSet; + } + + @Override + public EnumSet getMovieDataTypeSet() { + return movieDataTypeSet; } } diff --git a/core/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java b/core/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java index 9908f6a..9e24704 100644 --- a/core/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java +++ b/core/src/test/java/ru/bvn13/imdbspider/runner/AppTest.java @@ -7,8 +7,10 @@ import org.junit.Test; import ru.bvn13.imdbspider.ImdbSpider; import ru.bvn13.imdbspider.exceptions.ImdbSpiderException; import ru.bvn13.imdbspider.imdb.Movie; +import ru.bvn13.imdbspider.imdb.MovieDataType; import ru.bvn13.imdbspider.imdb.MovieList; +import java.util.EnumSet; import java.util.List; @@ -24,7 +26,7 @@ public class AppTest @Test public void searchTerminatorTest() { try { - MovieList result = spider.searchMovieByTitle("test", 5); + MovieList result = spider.searchMovieByTitle("Терминатор", 5, EnumSet.of(MovieDataType.ID, MovieDataType.TITLE, MovieDataType.ORIGINAL_TITLE, MovieDataType.YEAR)); } catch (ImdbSpiderException e) { e.printStackTrace(); }