start working on retrieving akas, refactoring

master
Vyacheslav N. Boyko 2019-01-17 00:07:28 +03:00
parent d62a55df85
commit c133ad77bb
12 changed files with 208 additions and 27 deletions

View File

@ -0,0 +1,24 @@
package ru.bvn13.imdbspider.imdb;
import java.util.EnumSet;
/**
* Created by bvn13 on 16.01.2019.
*/
public class Aka extends ImdbObject<AkaDataType> {
private String title;
@Override
protected void initRetrievedDataTypes() {
this.retrievedDataTypes = EnumSet.noneOf(AkaDataType.class);
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}

View File

@ -0,0 +1,22 @@
package ru.bvn13.imdbspider.imdb;
/**
* Created by bvn13 on 16.01.2019.
*/
public enum AkaDataType implements DataType {
ID("id"),
TITLE("title")
;
private String value;
AkaDataType(String value) {
this.value = value;
}
@Override
public String get() {
return value;
}
}

View File

@ -0,0 +1,25 @@
package ru.bvn13.imdbspider.imdb;
import java.util.EnumSet;
import java.util.List;
/**
* Created by bvn13 on 16.01.2019.
*/
public class AkaList extends ImdbObject<AkaListDataType> {
private List<Aka> akas;
@Override
protected void initRetrievedDataTypes() {
this.retrievedDataTypes = EnumSet.noneOf(AkaListDataType.class);
}
public List<Aka> getAkas() {
return akas;
}
public void setAkas(List<Aka> akas) {
this.akas = akas;
}
}

View File

@ -0,0 +1,21 @@
package ru.bvn13.imdbspider.imdb;
/**
* Created by bvn13 on 16.01.2019.
*/
public enum AkaListDataType implements DataType {
ELEMENTS("elements")
;
private String value;
AkaListDataType(String value) {
this.value = value;
}
@Override
public String get() {
return value;
}
}

View File

@ -1,9 +1,13 @@
package ru.bvn13.imdbspider.spider.api.v1_0;
import ru.bvn13.imdbspider.imdb.DataType;
import ru.bvn13.imdbspider.imdb.ImdbObject;
import ru.bvn13.imdbspider.spider.tasker.Task;
/**
* @author boyko_vn at 15.01.2019
*/
abstract public class AbstractApiProcessor_1_0 {
abstract public class AbstractApiProcessor_1_0<C extends ImdbObject, D extends Enum<?> & DataType> {
protected boolean isDebug;
@ -20,4 +24,8 @@ abstract public class AbstractApiProcessor_1_0 {
public void setDebug(boolean debug) {
isDebug = debug;
}
abstract void fillUpImdbObject(C imdbObject, Task task);
abstract Task taskByDataType(D dataType);
}

View File

@ -0,0 +1,26 @@
package ru.bvn13.imdbspider.spider.api.v1_0;
import ru.bvn13.imdbspider.imdb.AkaList;
import ru.bvn13.imdbspider.imdb.AkaListDataType;
import ru.bvn13.imdbspider.spider.tasker.Task;
/**
* Created by bvn13 on 16.01.2019.
*/
public class AkaListProcessor_1_0 extends AbstractApiProcessor_1_0<AkaList, AkaListDataType> {
public AkaListProcessor_1_0(ApiFactory_1_0 apiFactory) {
super(apiFactory);
}
@Override
Task taskByDataType(AkaListDataType dataType) {
return null;
}
@Override
public void fillUpImdbObject(AkaList imdbObject, Task task) {
}
}

View File

@ -0,0 +1,24 @@
package ru.bvn13.imdbspider.spider.api.v1_0;
import ru.bvn13.imdbspider.imdb.Aka;
import ru.bvn13.imdbspider.imdb.AkaDataType;
import ru.bvn13.imdbspider.spider.tasker.Task;
/**
* Created by bvn13 on 16.01.2019.
*/
public class AkaProcessor_1_0 extends AbstractApiProcessor_1_0<Aka, AkaDataType> {
public AkaProcessor_1_0(ApiFactory_1_0 apiFactory) {
super(apiFactory);
}
@Override
Task taskByDataType(AkaDataType dataType) {
return null;
}
@Override
void fillUpImdbObject(Aka imdbObject, Task task) {
}
}

View File

@ -120,6 +120,8 @@ public class ApiFactory_1_0 implements ApiFactory {
private MovieProcessor_1_0 movieProcessor;
private TaglineListProcessor_1_0 taglineListProcessor;
private TaglineProcessor_1_0 taglineProcessor;
private AkaListProcessor_1_0 akaListProcessor;
private AkaProcessor_1_0 akaProcessor;
public ApiFactory_1_0(HtmlProcessor htmlProcessor) {
this.htmlProcessor = htmlProcessor;
@ -128,6 +130,8 @@ public class ApiFactory_1_0 implements ApiFactory {
this.movieProcessor = new MovieProcessor_1_0(this);
this.taglineListProcessor = new TaglineListProcessor_1_0(this);
this.taglineProcessor = new TaglineProcessor_1_0(this);
this.akaListProcessor = new AkaListProcessor_1_0(this);
this.akaProcessor = new AkaProcessor_1_0(this);
}
@Override
@ -163,13 +167,17 @@ public class ApiFactory_1_0 implements ApiFactory {
@Override
public Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException {
if (dataType instanceof MovieDataType) {
return movieProcessor.taskByMovieDataType((MovieDataType) dataType);
return movieProcessor.taskByDataType((MovieDataType) dataType);
} else if (dataType instanceof MovieListDataType) {
return movieListProcessor.taskByMovieListDataType((MovieListDataType) dataType);
return movieListProcessor.taskByDataType((MovieListDataType) dataType);
} else if (dataType instanceof TaglineListDataType) {
return taglineListProcessor.taskByTaglineListDataType((TaglineListDataType) dataType);
return taglineListProcessor.taskByDataType((TaglineListDataType) dataType);
} else if (dataType instanceof TaglineDataType) {
return taglineProcessor.taskByTaglineDataType((TaglineDataType) dataType);
return taglineProcessor.taskByDataType((TaglineDataType) dataType);
} else if (dataType instanceof AkaListDataType) {
return akaListProcessor.taskByDataType((AkaListDataType) dataType);
} else if (dataType instanceof AkaDataType) {
return akaProcessor.taskByDataType((AkaDataType) dataType);
} else {
throw new DataTypeNotSupportedException(String.format("DataType %s is not supported by API v1_0!", dataType.getClass().getName()));
}
@ -179,19 +187,27 @@ public class ApiFactory_1_0 implements ApiFactory {
public void fillUpImdbObject(ImdbObject imdbObject, Task task) {
if (imdbObject instanceof Movie) {
if (task.getDataType() instanceof MovieDataType) {
movieProcessor.fillUpMovie((Movie) imdbObject, task);
movieProcessor.fillUpImdbObject((Movie) imdbObject, task);
}
} else if (imdbObject instanceof MovieList) {
if (task.getDataType() instanceof MovieListDataType) {
movieListProcessor.fillUpMovieList((MovieList) imdbObject, task);
movieListProcessor.fillUpImdbObject((MovieList) imdbObject, task);
}
} else if (imdbObject instanceof TaglineList) {
if (task.getDataType() instanceof TaglineListDataType) {
taglineListProcessor.fillUpTaglineList((TaglineList) imdbObject, task);
taglineListProcessor.fillUpImdbObject((TaglineList) imdbObject, task);
}
} else if (imdbObject instanceof Tagline) {
if (task.getDataType() instanceof TaglineDataType) {
taglineProcessor.fillUpTagline((Tagline) imdbObject, task);
taglineProcessor.fillUpImdbObject((Tagline) imdbObject, task);
}
} else if (imdbObject instanceof AkaList) {
if (task.getDataType() instanceof AkaListDataType) {
akaListProcessor.fillUpImdbObject((AkaList) imdbObject, task);
}
} else if (imdbObject instanceof Aka) {
if (task.getDataType() instanceof AkaDataType) {
akaProcessor.fillUpImdbObject((Aka) imdbObject, task);
}
}
}
@ -230,4 +246,12 @@ public class ApiFactory_1_0 implements ApiFactory {
TaglineListProcessor_1_0 getTaglineListProcessor() {
return taglineListProcessor;
}
AkaListProcessor_1_0 getAkaListProcessor() {
return akaListProcessor;
}
AkaProcessor_1_0 getAkaProcessor() {
return akaProcessor;
}
}

View File

@ -11,13 +11,14 @@ import java.util.List;
/**
* @author boyko_vn at 15.01.2019
*/
public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 {
public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0<MovieList, MovieListDataType> {
public MovieListProcessor_1_0(ApiFactory_1_0 apiFactory) {
super(apiFactory);
}
Task taskByMovieListDataType(MovieListDataType movieListDataType) {
@Override
Task taskByDataType(MovieListDataType movieListDataType) {
Task t = new Task();
t.setDataType(movieListDataType);
switch (movieListDataType) {
@ -40,12 +41,12 @@ public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 {
if (!getApiFactory().getMovieDataTypeSet().contains(MovieDataType.ID)) {
getApiFactory().getMovieDataTypeSet().add(MovieDataType.ID);
}
Task movieTask = getApiFactory().getMovieProcessor().taskByMovieDataType(MovieDataType.ID)
Task movieTask = getApiFactory().getMovieProcessor().taskByDataType(MovieDataType.ID)
.setParentTask(task)
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href")));
task.getNestedTasks().add(movieTask);
getApiFactory().getMovieDataTypeSet().forEach(movieDataType ->
movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByMovieDataType(movieDataType)
movieTask.getNestedTasks().add(getApiFactory().getMovieProcessor().taskByDataType(movieDataType)
.setParentTask(movieTask)
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, link.attr("href")))));
}
@ -55,7 +56,8 @@ public class MovieListProcessor_1_0 extends AbstractApiProcessor_1_0 {
return t;
}
void fillUpMovieList(MovieList movieList, Task task) {
@Override
void fillUpImdbObject(MovieList movieList, Task task) {
switch ((MovieListDataType) task.getDataType()) {
case ELEMENTS:
movieList.setUrl(task.getUrl());

View File

@ -17,13 +17,14 @@ import java.util.regex.Matcher;
/**
* @author boyko_vn at 15.01.2019
*/
public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 {
public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0<Movie, MovieDataType> {
public MovieProcessor_1_0(ApiFactory_1_0 apiFactory) {
super(apiFactory);
}
public Task taskByMovieDataType(MovieDataType movieDataType) {
@Override
Task taskByDataType(MovieDataType movieDataType) {
Task t = new Task();
t.setDataType(movieDataType);
switch (movieDataType) {
@ -189,7 +190,7 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 {
if (task.getCssSelectorResult().size() > 0) {
Elements links = task.getCssSelectorResult().first().parent().select("span > a:contains(See more)");
if (links.size() > 0) {
Task newTask = getApiFactory().getTaglineListProcessor().taskByTaglineListDataType(TaglineListDataType.ELEMENTS)
Task newTask = getApiFactory().getTaglineListProcessor().taskByDataType(TaglineListDataType.ELEMENTS)
.setParentTask(task)
.setUrl(String.format("%s%s", ApiFactory_1_0.URL_MAIN, links.first().attr("href")));
task.getNestedTasks().add(newTask);
@ -202,7 +203,8 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 {
}
void fillUpMovie(Movie movie, Task task) {
@Override
void fillUpImdbObject(Movie movie, Task task) {
boolean isDone = false;
switch ((MovieDataType) task.getDataType()) {
case ID:

View File

@ -11,13 +11,14 @@ import java.util.concurrent.atomic.AtomicInteger;
/**
* @author boyko_vn at 15.01.2019
*/
public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 {
public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0<TaglineList, TaglineListDataType> {
public TaglineListProcessor_1_0(ApiFactory_1_0 apiFactory) {
super(apiFactory);
}
Task taskByTaglineListDataType(TaglineListDataType taglineListDataType) {
@Override
Task taskByDataType(TaglineListDataType taglineListDataType) {
Task t = new Task();
t.setDataType(taglineListDataType);
switch (taglineListDataType) {
@ -26,13 +27,13 @@ public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 {
AtomicInteger i = new AtomicInteger(0);
t.setPostprocess((task, s) -> {
for (Element element : task.getCssSelectorResult()) {
Task newTaskId = getApiFactory().getTaglineProcessor().taskByTaglineDataType(TaglineDataType.ID)
Task newTaskId = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.ID)
.setParentTask(task)
.setUrl(task.getUrl())
.setResult(String.format("%d", i.getAndAdd(1)));
task.getNestedTasks().add(newTaskId);
Task newTaskText = getApiFactory().getTaglineProcessor().taskByTaglineDataType(TaglineDataType.TEXT)
Task newTaskText = getApiFactory().getTaglineProcessor().taskByDataType(TaglineDataType.TEXT)
.setParentTask(task)
.setUrl(task.getUrl())
.setResult(element.text());
@ -44,7 +45,8 @@ public class TaglineListProcessor_1_0 extends AbstractApiProcessor_1_0 {
return t;
}
void fillUpTaglineList(TaglineList taglineList, Task task) {
@Override
void fillUpImdbObject(TaglineList taglineList, Task task) {
switch ((TaglineListDataType) task.getDataType()) {
case ELEMENTS:
taglineList.setUrl(task.getUrl());

View File

@ -7,13 +7,14 @@ import ru.bvn13.imdbspider.spider.tasker.Task;
/**
* @author boyko_vn at 15.01.2019
*/
public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0 {
public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0<Tagline, TaglineDataType> {
public TaglineProcessor_1_0(ApiFactory_1_0 apiFactory) {
super(apiFactory);
}
Task taskByTaglineDataType(TaglineDataType taglineDataType) {
@Override
Task taskByDataType(TaglineDataType taglineDataType) {
Task t = new Task();
t.setDataType(taglineDataType);
switch (taglineDataType) {
@ -29,8 +30,8 @@ public class TaglineProcessor_1_0 extends AbstractApiProcessor_1_0 {
return t;
}
void fillUpTagline(Tagline tagline, Task task) {
@Override
void fillUpImdbObject(Tagline tagline, Task task) {
switch ((TaglineDataType) task.getDataType()) {
case ID:
tagline.setUrl(task.getUrl());