mirror of https://github.com/bvn13/imdb-spider.git
added debug mode for Travis-CI test
parent
85e7ff5cf3
commit
e1eb8e6c5c
|
@ -20,6 +20,8 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class ImdbSpider {
|
public class ImdbSpider {
|
||||||
|
|
||||||
|
private boolean isDebug;
|
||||||
|
|
||||||
private Manager manager;
|
private Manager manager;
|
||||||
|
|
||||||
private ApiFactory apiFactory;
|
private ApiFactory apiFactory;
|
||||||
|
@ -38,6 +40,17 @@ public class ImdbSpider {
|
||||||
manager = new Manager();
|
manager = new Manager();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isDebug() {
|
||||||
|
return isDebug;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ImdbSpider setDebug(boolean debug) {
|
||||||
|
isDebug = debug;
|
||||||
|
manager.setDebug(isDebug);
|
||||||
|
apiFactory.setDebug(isDebug);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public ImdbSpider addHttpRequestHeader(String key, String value) {
|
public ImdbSpider addHttpRequestHeader(String key, String value) {
|
||||||
manager.addHttpRequestHeader(key, value);
|
manager.addHttpRequestHeader(key, value);
|
||||||
return this;
|
return this;
|
||||||
|
|
|
@ -7,6 +7,8 @@ import java.util.EnumSet;
|
||||||
*/
|
*/
|
||||||
public abstract class ImdbObject<DT extends Enum<DT> & DataType> {
|
public abstract class ImdbObject<DT extends Enum<DT> & DataType> {
|
||||||
|
|
||||||
|
protected String html;
|
||||||
|
|
||||||
protected EnumSet<DT> retrievedDataTypes;
|
protected EnumSet<DT> retrievedDataTypes;
|
||||||
|
|
||||||
protected String id;
|
protected String id;
|
||||||
|
@ -16,6 +18,14 @@ public abstract class ImdbObject<DT extends Enum<DT> & DataType> {
|
||||||
this.initRetrievedDataTypes();
|
this.initRetrievedDataTypes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getHtml() {
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHtml(String html) {
|
||||||
|
this.html = html;
|
||||||
|
}
|
||||||
|
|
||||||
protected abstract void initRetrievedDataTypes();
|
protected abstract void initRetrievedDataTypes();
|
||||||
|
|
||||||
public boolean isDataTypeRetrieved(DT dataType) {
|
public boolean isDataTypeRetrieved(DT dataType) {
|
||||||
|
|
|
@ -16,6 +16,8 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public interface ApiFactory {
|
public interface ApiFactory {
|
||||||
|
|
||||||
|
void setDebug(boolean debug);
|
||||||
|
|
||||||
List<Task> createTasksForSearchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException;
|
List<Task> createTasksForSearchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException;
|
||||||
|
|
||||||
Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException;
|
Task taskByDataType(DataType dataType) throws DataTypeNotSupportedException;
|
||||||
|
|
|
@ -5,6 +5,8 @@ package ru.bvn13.imdbspider.spider.api.v1_0;
|
||||||
*/
|
*/
|
||||||
abstract public class AbstractApiProcessor_1_0 {
|
abstract public class AbstractApiProcessor_1_0 {
|
||||||
|
|
||||||
|
protected boolean isDebug;
|
||||||
|
|
||||||
private ApiFactory_1_0 apiFactory;
|
private ApiFactory_1_0 apiFactory;
|
||||||
|
|
||||||
public AbstractApiProcessor_1_0(ApiFactory_1_0 apiFactory) {
|
public AbstractApiProcessor_1_0(ApiFactory_1_0 apiFactory) {
|
||||||
|
@ -14,4 +16,8 @@ abstract public class AbstractApiProcessor_1_0 {
|
||||||
public ApiFactory_1_0 getApiFactory() {
|
public ApiFactory_1_0 getApiFactory() {
|
||||||
return apiFactory;
|
return apiFactory;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setDebug(boolean debug) {
|
||||||
|
isDebug = debug;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,8 @@ import java.util.regex.Pattern;
|
||||||
*/
|
*/
|
||||||
public class ApiFactory_1_0 implements ApiFactory {
|
public class ApiFactory_1_0 implements ApiFactory {
|
||||||
|
|
||||||
|
private boolean isDebug;
|
||||||
|
|
||||||
static final String URL_MAIN = "https://www.imdb.com";
|
static final String URL_MAIN = "https://www.imdb.com";
|
||||||
|
|
||||||
private static final String URL_SEARCH_TITLE = "https://www.imdb.com/find?ref_=nv_sr_fn&q={{title}}&s=tt";
|
private static final String URL_SEARCH_TITLE = "https://www.imdb.com/find?ref_=nv_sr_fn&q={{title}}&s=tt";
|
||||||
|
@ -128,6 +130,12 @@ public class ApiFactory_1_0 implements ApiFactory {
|
||||||
this.taglineProcessor = new TaglineProcessor_1_0(this);
|
this.taglineProcessor = new TaglineProcessor_1_0(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setDebug(boolean debug) {
|
||||||
|
isDebug = debug;
|
||||||
|
this.movieProcessor.setDebug(isDebug);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Task> createTasksForSearchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException {
|
public List<Task> createTasksForSearchMovieByTitle(String title, int maxCount, EnumSet<MovieDataType> dataTypes) throws ImdbSpiderException {
|
||||||
|
|
||||||
|
|
|
@ -191,6 +191,9 @@ public class MovieProcessor_1_0 extends AbstractApiProcessor_1_0 {
|
||||||
boolean isDone = false;
|
boolean isDone = false;
|
||||||
switch ((MovieDataType) task.getDataType()) {
|
switch ((MovieDataType) task.getDataType()) {
|
||||||
case ID:
|
case ID:
|
||||||
|
if (isDebug) {
|
||||||
|
movie.setHtml(task.getHtml());
|
||||||
|
}
|
||||||
movie.setUrl(task.getUrl());
|
movie.setUrl(task.getUrl());
|
||||||
movie.setId((String) task.getResult());
|
movie.setId((String) task.getResult());
|
||||||
isDone = true;
|
isDone = true;
|
||||||
|
|
|
@ -13,8 +13,18 @@ import java.util.concurrent.*;
|
||||||
*/
|
*/
|
||||||
public class Manager {
|
public class Manager {
|
||||||
|
|
||||||
|
private boolean isDebug;
|
||||||
|
|
||||||
private Map<String, String> httpRequestHeaders = new HashMap<>();
|
private Map<String, String> httpRequestHeaders = new HashMap<>();
|
||||||
|
|
||||||
|
public boolean isDebug() {
|
||||||
|
return isDebug;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDebug(boolean debug) {
|
||||||
|
isDebug = debug;
|
||||||
|
}
|
||||||
|
|
||||||
public void addHttpRequestHeader(String key, String value) {
|
public void addHttpRequestHeader(String key, String value) {
|
||||||
this.httpRequestHeaders.put(key, value);
|
this.httpRequestHeaders.put(key, value);
|
||||||
}
|
}
|
||||||
|
@ -38,6 +48,7 @@ public class Manager {
|
||||||
|
|
||||||
groupedTasks.entrySet().parallelStream().forEach(stringListEntry -> {
|
groupedTasks.entrySet().parallelStream().forEach(stringListEntry -> {
|
||||||
Worker w = new Worker(stringListEntry.getKey(), stringListEntry.getValue());
|
Worker w = new Worker(stringListEntry.getKey(), stringListEntry.getValue());
|
||||||
|
w.setDebug(isDebug);
|
||||||
try {
|
try {
|
||||||
w.run(httpRequestHeaders);
|
w.run(httpRequestHeaders);
|
||||||
} catch (HtmlExtractorException e) {
|
} catch (HtmlExtractorException e) {
|
||||||
|
|
|
@ -14,6 +14,7 @@ import java.util.function.BiConsumer;
|
||||||
public class Task {
|
public class Task {
|
||||||
|
|
||||||
private String url;
|
private String url;
|
||||||
|
private String html;
|
||||||
|
|
||||||
private DataType dataType;
|
private DataType dataType;
|
||||||
private String cssSelector;
|
private String cssSelector;
|
||||||
|
@ -58,6 +59,14 @@ public class Task {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getHtml() {
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHtml(String html) {
|
||||||
|
this.html = html;
|
||||||
|
}
|
||||||
|
|
||||||
public String getCssSelector() {
|
public String getCssSelector() {
|
||||||
return cssSelector;
|
return cssSelector;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,16 +7,16 @@ import ru.bvn13.imdbspider.spider.extractor.HtmlExtractor;
|
||||||
import ru.bvn13.imdbspider.spider.processor.HtmlProcessor;
|
import ru.bvn13.imdbspider.spider.processor.HtmlProcessor;
|
||||||
import ru.bvn13.imdbspider.spider.processor.JsoupHtmlProcessor;
|
import ru.bvn13.imdbspider.spider.processor.JsoupHtmlProcessor;
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author boyko_vn at 09.01.2019
|
* @author boyko_vn at 09.01.2019
|
||||||
*/
|
*/
|
||||||
public class Worker {
|
public class Worker {
|
||||||
|
|
||||||
|
private boolean isDebug;
|
||||||
|
|
||||||
private final String url;
|
private final String url;
|
||||||
private final List<Task> tasks;
|
private final List<Task> tasks;
|
||||||
|
|
||||||
|
@ -31,12 +31,24 @@ public class Worker {
|
||||||
this.htmlProcessor = new JsoupHtmlProcessor();
|
this.htmlProcessor = new JsoupHtmlProcessor();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isDebug() {
|
||||||
|
return isDebug;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDebug(boolean debug) {
|
||||||
|
isDebug = debug;
|
||||||
|
}
|
||||||
|
|
||||||
public Boolean run(Map<String, String> httpRequestHeaders) throws HtmlExtractorException {
|
public Boolean run(Map<String, String> httpRequestHeaders) throws HtmlExtractorException {
|
||||||
|
|
||||||
final String html = htmlExtractor.getHtml(url, httpRequestHeaders);
|
final String html = htmlExtractor.getHtml(url, httpRequestHeaders);
|
||||||
|
|
||||||
tasks.parallelStream().forEach(task -> {
|
tasks.parallelStream().forEach(task -> {
|
||||||
|
|
||||||
|
if (isDebug) {
|
||||||
|
task.setHtml(html);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (task.getCssSelector() != null && !task.getCssSelector().isEmpty()) {
|
if (task.getCssSelector() != null && !task.getCssSelector().isEmpty()) {
|
||||||
task.setCssSelectorResult(htmlProcessor.process(html, task.getCssSelector()));
|
task.setCssSelectorResult(htmlProcessor.process(html, task.getCssSelector()));
|
||||||
|
|
|
@ -31,6 +31,7 @@ public class MovieSearchTest {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void initClass() {
|
public static void initClass() {
|
||||||
spider = ImdbSpider.withApi_1_0()
|
spider = ImdbSpider.withApi_1_0()
|
||||||
|
.setDebug(true)
|
||||||
.addHttpRequestHeader("Content-Language", "en-EN");
|
.addHttpRequestHeader("Content-Language", "en-EN");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,6 +71,8 @@ public class MovieSearchTest {
|
||||||
System.out.println("==========================================");
|
System.out.println("==========================================");
|
||||||
System.out.println("MOVIE: \n"+json);
|
System.out.println("MOVIE: \n"+json);
|
||||||
System.out.println("==========================================");
|
System.out.println("==========================================");
|
||||||
|
System.out.println("HTML: \n"+movie.getHtml());
|
||||||
|
System.out.println("==========================================");
|
||||||
|
|
||||||
assertTrue("Expected ID field presence", movie.isDataTypeRetrieved(MovieDataType.ID));
|
assertTrue("Expected ID field presence", movie.isDataTypeRetrieved(MovieDataType.ID));
|
||||||
assertTrue("Expected TITLE field presence", movie.isDataTypeRetrieved(MovieDataType.TITLE));
|
assertTrue("Expected TITLE field presence", movie.isDataTypeRetrieved(MovieDataType.TITLE));
|
||||||
|
@ -93,7 +96,7 @@ public class MovieSearchTest {
|
||||||
assertTrue("Expected TAGLINES field presence", movie.isDataTypeRetrieved(MovieDataType.TAGLINES));
|
assertTrue("Expected TAGLINES field presence", movie.isDataTypeRetrieved(MovieDataType.TAGLINES));
|
||||||
|
|
||||||
assertEquals("Expected that first in search result has ID = 0088247, but given: "+movie.getId(), "0088247", movie.getId());
|
assertEquals("Expected that first in search result has ID = 0088247, but given: "+movie.getId(), "0088247", movie.getId());
|
||||||
assertEquals("Expected movie name: The Terminator, but given: "+movie.getOriginalTitle(), "The Terminator", movie.getOriginalTitle());
|
assertEquals("Expected original movie name: The Terminator, but given: "+movie.getOriginalTitle(), "The Terminator", movie.getOriginalTitle());
|
||||||
assertEquals("Expected year = "+movie.getYear(), Integer.valueOf(1984), movie.getYear());
|
assertEquals("Expected year = "+movie.getYear(), Integer.valueOf(1984), movie.getYear());
|
||||||
|
|
||||||
assertEquals("Invalid storyline", TERMINATOR_STORYLINE, movie.getStoryline());
|
assertEquals("Invalid storyline", TERMINATOR_STORYLINE, movie.getStoryline());
|
||||||
|
|
Loading…
Reference in New Issue