mirror of https://github.com/bvn13/imdb-spider.git
start working on retrieving akas, refactoring
parent
d62a55df85
commit
c133ad77bb
|
@ -0,0 +1,24 @@
|
|||
package ru.bvn13.imdbspider.imdb;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
/**
|
||||
* Created by bvn13 on 16.01.2019.
|
||||
*/
|
||||
public class Aka extends ImdbObject<AkaDataType> {
|
||||
|
||||
private String title;
|
||||
|
||||
@Override
|
||||
protected void initRetrievedDataTypes() {
|
||||
this.retrievedDataTypes = EnumSet.noneOf(AkaDataType.class);
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
package ru.bvn13.imdbspider.imdb;
|
||||
|
||||
/**
|
||||
* Created by bvn13 on 16.01.2019.
|
||||
*/
|
||||
public enum AkaDataType implements DataType {
|
||||
|
||||
ID("id"),
|
||||
TITLE("title")
|
||||
;
|
||||
|
||||
private String value;
|
||||
|
||||
AkaDataType(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String get() {
|
||||
return value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package ru.bvn13.imdbspider.imdb;
|
||||
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by bvn13 on 16.01.2019.
|
||||
*/
|
||||
public class AkaList extends ImdbObject<AkaListDataType> {
|
||||
|
||||
private List<Aka> akas;
|
||||
|
||||
@Override
|
||||
protected void initRetrievedDataTypes() {
|
||||
this.retrievedDataTypes = EnumSet.noneOf(AkaListDataType.class);
|
||||
}
|
||||
|
||||
public List<Aka> getAkas() {
|
||||
return akas;
|
||||
}
|
||||
|
||||
public void setAkas(List<Aka> akas) {
|
||||
this.akas = akas;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
package ru.bvn13.imdbspider.imdb;
|
||||
|
||||
/**
|
||||
* Created by bvn13 on 16.01.2019.
|
||||
*/
|
||||
public enum AkaListDataType implements DataType {
|
||||
|
||||
ELEMENTS("elements")
|
||||
;
|
||||
|
||||
private String value;
|
||||
|
||||
AkaListDataType(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String get() {
|
||||
return value;
|
||||
}
|
||||
}
|
|
@ -1,9 +1,13 @@
|
|||
package ru.bvn13.imdbspider.spider.api.v1_0;
|
||||
|
||||
import ru.bvn13.imdbspider.imdb.DataType;
|
||||
import ru.bvn13.imdbspider.imdb.ImdbObject;
|
||||
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||
|
||||
/**
|
||||
* @author boyko_vn at 15.01.2019
|
||||
*/
|
||||
abstract public class AbstractApiProcessor_1_0 {
|
||||
abstract public class AbstractApiProcessor_1_0<C extends ImdbObject, D extends Enum<?> & DataType> {
|
||||
|
||||
protected boolean isDebug;
|
||||
|
||||
|
@ -20,4 +24,8 @@ abstract public class AbstractApiProcessor_1_0 {
|
|||
public void setDebug(boolean debug) {
|
||||
isDebug = debug;
|
||||
}
|
||||
|
||||
abstract void fillUpImdbObject(C imdbObject, Task task);
|
||||
|
||||
abstract Task taskByDataType(D dataType);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
package ru.bvn13.imdbspider.spider.api.v1_0;
|
||||
|
||||
import ru.bvn13.imdbspider.imdb.AkaList;
|
||||
import ru.bvn13.imdbspider.imdb.AkaListDataType;
|
||||
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||
|
||||
/**
|
||||
* Created by bvn13 on 16.01.2019.
|
||||
*/
|
||||
public class AkaListProcessor_1_0 extends AbstractApiProcessor_1_0<AkaList, AkaListDataType> {
|
||||
|
||||
public AkaListProcessor_1_0(ApiFactory_1_0 apiFactory) {
|
||||
super(apiFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
Task taskByDataType(AkaListDataType dataType) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillUpImdbObject(AkaList imdbObject, Task task) {
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package ru.bvn13.imdbspider.spider.api.v1_0;
|
||||
|
||||
import ru.bvn13.imdbspider.imdb.Aka;
|
||||
import ru.bvn13.imdbspider.imdb.AkaDataType;
|
||||
import ru.bvn13.imdbspider.spider.tasker.Task;
|
||||
|
||||
/**
|
||||
* Created by bvn13 on 16.01.2019.
|
||||
*/
|
||||
public class AkaProcessor_1_0 extends AbstractApiProcessor_1_0<Aka, AkaDataType> {
|
||||
public AkaProcessor_1_0(ApiFactory_1_0 apiFactory) {
|
||||
super(apiFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
Task taskByDataType(AkaDataType dataType) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
void fillUpImdbObject(Aka imdbObject, Task task) {
|
||||
|
||||
}
|
||||
}
|
|
@ -120,6 +120,8 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
private MovieProcessor_1_0 movieProcessor;
|
||||
private TaglineListProcessor_1_0 taglineListProcessor;
|
||||
private TaglineProcessor_1_0 taglineProcessor;
|
||||
private AkaListProcessor_1_0 akaListProcessor;
|
||||
private AkaProcessor_1_0 akaProcessor;
|
||||
|
||||
public ApiFactory_1_0(HtmlProcessor htmlProcessor) {
|
||||
this.htmlProcessor = htmlProcessor;
|
||||
|
@ -128,6 +130,8 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
this.movieProcessor = new MovieProcessor_1_0(this);
|
||||
this.taglineListProcessor = new TaglineListProcessor_1_0(this);
|
||||
this.taglineProcessor = new TaglineProcessor_1_0(this);
|
||||
this.akaListProcessor = new AkaListProcessor_1_0(this);
|
||||
this.akaProcessor = new AkaProcessor_1_0(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -163,13 +167,17 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
@Override
|
||||
public Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException {
|
||||
if (dataType instanceof MovieDataType) {
|
||||
return movieProcessor.taskByMovieDataType((MovieDataType) dataType);
|
||||
return movieProcessor.taskByDataType((MovieDataType) dataType);
|
||||
} else if (dataType instanceof MovieListDataType) {
|
||||
return movieListProcessor.taskByMovieListDataType((MovieListDataType) dataType);
|
||||
return movieListProcessor.taskByDataType((MovieListDataType) dataType);
|
||||
} else if (dataType instanceof TaglineListDataType) {
|
||||
return taglineListProcessor.taskByTaglineListDataType((TaglineListDataType) dataType);
|
||||
return taglineListProcessor.taskByDataType((TaglineListDataType) dataType);
|
||||
} else if (dataType instanceof TaglineDataType) {
|
||||
return taglineProcessor.taskByTaglineDataType((TaglineDataType) dataType);
|
||||
return taglineProcessor.taskByDataType((TaglineDataType) dataType);
|
||||
} else if (dataType instanceof AkaListDataType) {
|
||||
return akaListProcessor.taskByDataType((AkaListDataType) dataType);
|
||||
} else if (dataType instanceof AkaDataType) {
|
||||
return akaProcessor.taskByDataType((AkaDataType) dataType);
|
||||
} else {
|
||||
throw new DataTypeNotSupportedException(String.format("DataType %s is not supported by API v1_0!", dataType.getClass().getName()));
|
||||
}
|
||||
|
@ -179,19 +187,27 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
public void fillUpImdbObject(ImdbObject imdbObject, Task task) {
|
||||
if (imdbObject instanceof Movie) {
|
||||
if (task.getDataType() instanceof MovieDataType) {
|
||||
movieProcessor.fillUpMovie((Movie) imdbObject, task);
|
||||
movieProcessor.fillUpImdbObject((Movie) imdbObject, task);
|
||||
}
|
||||
} else if (imdbObject instanceof MovieList) {
|
||||
if (task.getDataType() instanceof MovieListDataType) {
|
||||
movieListProcessor.fillUpMovieList((MovieList) imdbObject, task);
|
||||
movieListProcessor.fillUpImdbObject((MovieList) imdbObject, task);
|
||||
}
|
||||
} else if (imdbObject instanceof TaglineList) {
|
||||
if (task.getDataType() instanceof TaglineListDataType) {
|
||||
taglineListProcessor.fillUpTaglineList((TaglineList) imdbObject, task);
|
||||
taglineListProcessor.fillUpImdbObject((TaglineList) imdbObject, task);
|
||||
}
|
||||
} else if (imdbObject instanceof Tagline) {
|
||||
if (task.getDataType() instanceof TaglineDataType) {
|
||||
taglineProcessor.fillUpTagline((Tagline) imdbObject, task);
|
||||
taglineProcessor.fillUpImdbObject((Tagline) imdbObject, task);
|
||||
}
|
||||
} else if (imdbObject instanceof AkaList) {
|
||||
if (task.getDataType() instanceof AkaListDataType) {
|
||||
akaListProcessor.fillUpImdbObject((AkaList) imdbObject, task);
|
||||
}
|
||||
} else if (imdbObject instanceof Aka) {
|
||||
if (task.getDataType() instanceof AkaDataType) {
|
||||
akaProcessor.fillUpImdbObject((Aka) imdbObject, task);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -230,4 +246,12 @@ public class ApiFactory_1_0 implements ApiFactory {
|
|||
TaglineListProcessor_1_0 getTaglineListProcessor() {
|
||||
return taglineListProcessor;
|
||||
}
|
||||
|
||||
AkaListProcessor_1_0 getAkaListProcessor() {
|
||||
return akaListProcessor;
|
||||
}
|
||||
|
||||
AkaProcessor_1_0 getAkaProcessor() {
|
||||
return akaProcessor;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,13 +11,14 @@ import java.util.List;
|
|||
/**
|
||||
* @author boyko_vn at 15.01.2019
|
||||
*/
|
||||
public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
||||
public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0<MovieList, MovieListDataType> {
|
||||
|
||||
public MovieListProcessor_1_0(ApiFactory_1_0 apiFactory) {
|
||||
super(apiFactory);
|
||||
}
|
||||
|
||||
Task taskByMovieListDataType(MovieListDataType movieListDataType) {
|
||||
@Override
|
||||
Task taskByDataType(MovieListDataType movieListDataType) {
|
||||
Task t = new Task();
|
||||
t.setDataType(movieListDataType);
|
||||
switch (movieListDataType) {
|
||||
|
@ -40,12 +41,12 @@ public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
|||
if (!getApiFactory().getMovieDataTypeSet().contains(MovieDataType.ID)) {
|
||||
getApiFactory().getMovieDataTypeSet().add(MovieDataType.ID);
|
||||
}
|
||||
Task movieTask = getApiFactory().getMovieProcessor().taskByMovieDataType(MovieDataType.ID)
|
||||
Task movieTask = getApiFactory().getMovieProcessor().taskByDataType(MovieDataType.ID)
|
||||
.setParentTask(task)
|
||||
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href")));
|
||||
task.getNestedTasks().add(movieTask);
|
||||
getApiFactory().getMovieDataTypeSet().forEach(movieDataType ->
|
||||
movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByMovieDataType(movieDataType)
|
||||
movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByDataType(movieDataType)
|
||||
.setParentTask(movieTask)
|
||||
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href")))));
|
||||
}
|
||||
|
@ -55,7 +56,8 @@ public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
|||
return t;
|
||||
}
|
||||
|
||||
void fillUpMovieList(MovieList movieList, Task task) {
|
||||
@Override
|
||||
void fillUpImdbObject(MovieList movieList, Task task) {
|
||||
switch ((MovieListDataType) task.getDataType()) {
|
||||
case ELEMENTS:
|
||||
movieList.setUrl(task.getUrl());
|
||||
|
|
|
@ -17,13 +17,14 @@ import java.util.regex.Matcher;
|
|||
/**
|
||||
* @author boyko_vn at 15.01.2019
|
||||
*/
|
||||
public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
||||
public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0<Movie, MovieDataType> {
|
||||
|
||||
public MovieProcessor_1_0(ApiFactory_1_0 apiFactory) {
|
||||
super(apiFactory);
|
||||
}
|
||||
|
||||
public Task taskByMovieDataType(MovieDataType movieDataType) {
|
||||
@Override
|
||||
Task taskByDataType(MovieDataType movieDataType) {
|
||||
Task t = new Task();
|
||||
t.setDataType(movieDataType);
|
||||
switch (movieDataType) {
|
||||
|
@ -189,7 +190,7 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
|||
if (task.getCssSelectorResult().size() > 0) {
|
||||
Elements links = task.getCssSelectorResult().first().parent().select("span > a:contains(See more)");
|
||||
if (links.size() > 0) {
|
||||
Task newTask = getApiFactory().getTaglineListProcessor().taskByTaglineListDataType(TaglineListDataType.ELEMENTS)
|
||||
Task newTask = getApiFactory().getTaglineListProcessor().taskByDataType(TaglineListDataType.ELEMENTS)
|
||||
.setParentTask(task)
|
||||
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, links.first().attr("href")));
|
||||
task.getNestedTasks().add(newTask);
|
||||
|
@ -202,7 +203,8 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
|||
}
|
||||
|
||||
|
||||
void fillUpMovie(Movie movie, Task task) {
|
||||
@Override
|
||||
void fillUpImdbObject(Movie movie, Task task) {
|
||||
boolean isDone = false;
|
||||
switch ((MovieDataType) task.getDataType()) {
|
||||
case ID:
|
||||
|
|
|
@ -11,13 +11,14 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
/**
|
||||
* @author boyko_vn at 15.01.2019
|
||||
*/
|
||||
public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
||||
public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0<TaglineList, TaglineListDataType> {
|
||||
|
||||
public TaglineListProcessor_1_0(ApiFactory_1_0 apiFactory) {
|
||||
super(apiFactory);
|
||||
}
|
||||
|
||||
Task taskByTaglineListDataType(TaglineListDataType taglineListDataType) {
|
||||
@Override
|
||||
Task taskByDataType(TaglineListDataType taglineListDataType) {
|
||||
Task t = new Task();
|
||||
t.setDataType(taglineListDataType);
|
||||
switch (taglineListDataType) {
|
||||
|
@ -26,13 +27,13 @@ public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
|||
AtomicInteger i = new AtomicInteger(0);
|
||||
t.setPostprocess((task, s) -> {
|
||||
for (Element element : task.getCssSelectorResult()) {
|
||||
Task newTaskId = getApiFactory().getTaglineProcessor().taskByTaglineDataType(TaglineDataType.ID)
|
||||
Task newTaskId = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.ID)
|
||||
.setParentTask(task)
|
||||
.setUrl(task.getUrl())
|
||||
.setResult(String.format("%d", i.getAndAdd(1)));
|
||||
task.getNestedTasks().add(newTaskId);
|
||||
|
||||
Task newTaskText = getApiFactory().getTaglineProcessor().taskByTaglineDataType(TaglineDataType.TEXT)
|
||||
Task newTaskText = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.TEXT)
|
||||
.setParentTask(task)
|
||||
.setUrl(task.getUrl())
|
||||
.setResult(element.text());
|
||||
|
@ -44,7 +45,8 @@ public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
|||
return t;
|
||||
}
|
||||
|
||||
void fillUpTaglineList(TaglineList taglineList, Task task) {
|
||||
@Override
|
||||
void fillUpImdbObject(TaglineList taglineList, Task task) {
|
||||
switch ((TaglineListDataType) task.getDataType()) {
|
||||
case ELEMENTS:
|
||||
taglineList.setUrl(task.getUrl());
|
||||
|
|
|
@ -7,13 +7,14 @@ import ru.bvn13.imdbspider.spider.tasker.Task;
|
|||
/**
|
||||
* @author boyko_vn at 15.01.2019
|
||||
*/
|
||||
public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
||||
public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0<Tagline, TaglineDataType> {
|
||||
|
||||
public TaglineProcessor_1_0(ApiFactory_1_0 apiFactory) {
|
||||
super(apiFactory);
|
||||
}
|
||||
|
||||
Task taskByTaglineDataType(TaglineDataType taglineDataType) {
|
||||
@Override
|
||||
Task taskByDataType(TaglineDataType taglineDataType) {
|
||||
Task t = new Task();
|
||||
t.setDataType(taglineDataType);
|
||||
switch (taglineDataType) {
|
||||
|
@ -29,8 +30,8 @@ public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
|||
return t;
|
||||
}
|
||||
|
||||
|
||||
void fillUpTagline(Tagline tagline, Task task) {
|
||||
@Override
|
||||
void fillUpImdbObject(Tagline tagline, Task task) {
|
||||
switch ((TaglineDataType) task.getDataType()) {
|
||||
case ID:
|
||||
tagline.setUrl(task.getUrl());
|
||||
|
|
Loading…
Reference in New Issue