From 5014294dae6cf33ac8a1e21a6445ca6d078df4ba Mon Sep 17 00:00:00 2001 From: Vyacheslav Boyko Date: Wed, 16 Jan 2019 18:08:37 +0300 Subject: [PATCH] implemented initializing original title from simple title if the first one is not present --- .../bvn13/imdbspider/spider/api/ApiFactory.java | 1 + .../spider/api/v1_0/ApiFactory_1_0.java | 1 + .../spider/api/v1_0/MovieProcessor_1_0.java | 17 ++++++++++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java index ae10650..7c2dbf5 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/ApiFactory.java @@ -5,6 +5,7 @@ import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException; import ru.bvn13.imdbspider.imdb.DataType; import ru.bvn13.imdbspider.imdb.ImdbObject; import ru.bvn13.imdbspider.imdb.MovieDataType; +import ru.bvn13.imdbspider.spider.processor.HtmlProcessor; import ru.bvn13.imdbspider.spider.tasker.Task; import java.util.Arrays; diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java index b759f15..4f60c1c 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/ApiFactory_1_0.java @@ -214,6 +214,7 @@ public class ApiFactory_1_0 implements ApiFactory { } + HtmlProcessor getHtmlProcessor() { return htmlProcessor; } diff --git a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java index 40e5c5d..fbea68f 100644 --- a/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java +++ b/core/src/main/java/ru/bvn13/imdbspider/spider/api/v1_0/MovieProcessor_1_0.java @@ -42,7 +42,22 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 { break; case ORIGINAL_TITLE: t.setCssSelector("#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > div.originalTitle"); - t.setPostprocess(ApiFactory_1_0.POSTPROCESS.GET_OWN_TEXT_OF_FIRST_ELEMENT); + t.setPostprocess((task, s) -> { + task.setResultType(String.class); + task.setResult(""); + if (task.getCssSelectorResult().size() > 0) { + task.setResult(task.getCssSelectorResult().first().text()); + } else { + try { + Elements titles = getApiFactory().getHtmlProcessor().process(s, "#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > h1"); // like title + if (titles.size() > 0) { + task.setResult(titles.first().ownText()); + } + } catch (HtmlProcessorException e) { + e.printStackTrace(); + } + } + }); break; case YEAR: t.setCssSelector("#titleYear > a");