mirror of https://github.com/bvn13/imdb-spider.git
implemented initializing original title from simple title if the first one is not present
parent
e1eb8e6c5c
commit
5014294dae
|
@ -5,6 +5,7 @@ import ru.bvn13.imdbspider.exceptions.api.DataTypeNotSupportedException;
|
|||
import ru.bvn13.imdbspider.imdb.DataType;
|
||||
import ru.bvn13.imdbspider.imdb.ImdbObject;
|
||||
import ru.bvn13.imdbspider.imdb.MovieDataType;
|
||||
import ru.bvn13.imdbspider.spider.processor.HtmlProcessor;
|
||||
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -214,6 +214,7 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
}
|
||||
|
||||
|
||||
|
||||
HtmlProcessor getHtmlProcessor() {
|
||||
return htmlProcessor;
|
||||
}
|
||||
|
|
|
@ -42,7 +42,22 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
|||
break;
|
||||
case ORIGINAL_TITLE:
|
||||
t.setCssSelector("#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > div.originalTitle");
|
||||
t.setPostprocess(ApiFactory_1_0.POSTPROCESS.GET_OWN_TEXT_OF_FIRST_ELEMENT);
|
||||
t.setPostprocess((task, s) -> {
|
||||
task.setResultType(String.class);
|
||||
task.setResult("");
|
||||
if (task.getCssSelectorResult().size() > 0) {
|
||||
task.setResult(task.getCssSelectorResult().first().text());
|
||||
} else {
|
||||
try {
|
||||
Elements titles = getApiFactory().getHtmlProcessor().process(s, "#title-overview-widget > div.vital > div.title_block > div > div.titleBar > div.title_wrapper > h1"); // like title
|
||||
if (titles.size() > 0) {
|
||||
task.setResult(titles.first().ownText());
|
||||
}
|
||||
} catch (HtmlProcessorException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
});
|
||||
break;
|
||||
case YEAR:
|
||||
t.setCssSelector("#titleYear > a");
|
||||
|
|
Loading…
Reference in New Issue