From ec2b27a4902c743052ab586afaf31ceb09b20c84 Mon Sep 17 00:00:00 2001 From: "Vyacheslav N. Boyko" Date: Fri, 17 Nov 2017 01:22:18 +0300 Subject: [PATCH] implemented yandex search --- config.json | 5 + .../jircbot/Services/YandexSearchService.java | 248 ++++++++++++++++++ .../java/ru/bvn13/jircbot/bot/JircBot.java | 10 +- .../jircbot/config/JircBotConfiguration.java | 25 +- .../listeners/YandexSearchListener.java | 62 +++++ .../jircbot/model/YandexSearchSettings.java | 14 + 6 files changed, 355 insertions(+), 9 deletions(-) create mode 100644 src/main/java/ru/bvn13/jircbot/Services/YandexSearchService.java create mode 100644 src/main/java/ru/bvn13/jircbot/listeners/YandexSearchListener.java create mode 100644 src/main/java/ru/bvn13/jircbot/model/YandexSearchSettings.java diff --git a/config.json b/config.json index fbbae54..faf6769 100644 --- a/config.json +++ b/config.json @@ -20,6 +20,11 @@ "search" : { "uuid" : "YOUR_UUID", "appKey" : "YOUR_APP_KEY" + }, + "yaSearch" : { + "url" : "https://yandex.ru/search/xml?", + "user" : "USER_NAME", + "key" : "YOUR_KEY" } } } \ No newline at end of file diff --git a/src/main/java/ru/bvn13/jircbot/Services/YandexSearchService.java b/src/main/java/ru/bvn13/jircbot/Services/YandexSearchService.java new file mode 100644 index 0000000..f81fae4 --- /dev/null +++ b/src/main/java/ru/bvn13/jircbot/Services/YandexSearchService.java @@ -0,0 +1,248 @@ +package ru.bvn13.jircbot.Services; + +import lombok.Getter; +import lombok.Setter; +import org.springframework.stereotype.Service; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.helpers.XMLReaderFactory; + +import java.io.CharArrayWriter; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.List; + +@Service +public class YandexSearchService { + + public static final String ENC = "UTF-8"; + public static final String AND = "&"; + + public static class YaPage { + + public static final int ITEMS_PER_PAGE = 10; + + private String keyword; + private int pageNumber; + private List yaItems = new ArrayList(); + + /** + * Constructor + * @param keyword keyword for searching + * @param pageNumber number of page + */ + public YaPage(final String keyword, final int pageNumber) { + this.keyword = keyword; + this.pageNumber = pageNumber; + } + + public List getYaItems() { + return yaItems; + } + + /** + * Add one SERP item to collection (page) + * @param item one SERP item + */ + public void addYaItem(final YaItem item) { + + final int position = (pageNumber * ITEMS_PER_PAGE) + yaItems.size() + 1; + item.setPosition(position); + yaItems.add(item); + } + } + + public static class YaItem { + + private int position; + @Getter + private String url; + private String domain; + @Getter + private String title; + @Getter + private String description = ""; + private String passages = ""; + + /** + * Constructor + * @param url url of current item + */ + public YaItem(final String url) { + this.url = url; + } + + /* Тут набор getter-ов для приватных полей класса... */ + + public void setPosition(final int position) { + this.position = position; + } + + public void setDomain(final String domain) { + this.domain = domain; + } + + public void setTitle(final String title) { + this.title = title; + } + + public void setDescription(final String description) { + this.description = description; + } + + public void addPassage(final String passage) { + passages += passage; + } + + @Override + public String toString() { + return "YaItem{" + + "position=" + position + + ", url='" + url + '\'' + + ", domain='" + domain + '\'' + + ", title='" + title + '\'' + + ", description='" + description + '\'' + + ", passages='" + passages + '\'' + + '}'; + } + } + + public static class YaHandler extends DefaultHandler { + + private static final String IGNORE_TAG = "hlword"; + + private final CharArrayWriter buffer = new CharArrayWriter(); + private YaItem currentItem; + private YaPage yaPage; + + /** + * Constructor + * @param yaPage yandex page that will be filled with SERP items + */ + public YaHandler(final YaPage yaPage) { + this.yaPage = yaPage; + } + + @Override + public void startElement( + final String uri, + final String localName, + final String qName, + final Attributes attr + ) throws SAXException { + super.startElement(uri, localName, qName, attr); + if (!IGNORE_TAG.equals(qName)) { + buffer.reset(); + } + } + + @Override + public void endElement( + final String uri, + final String localName, + final String qName + ) throws SAXException { + + super.endElement(uri, localName, qName); + if ("error".equals(qName)) { + throw new IllegalArgumentException("Bad request: " + buffer.toString()); + } else if ("url".equals(qName)) { + currentItem = new YaItem(buffer.toString()); + } else if ("domain".equals(qName) && currentItem != null) { + currentItem.setDomain(buffer.toString()); + } else if ("title".equals(qName) && currentItem != null) { + currentItem.setTitle(clearFromTags(buffer.toString())); + } else if ("headline".equals(qName) && currentItem != null) { + currentItem.setDescription(clearFromTags(buffer.toString())); + } else if ("passage".equals(qName) && currentItem != null) { + currentItem.addPassage(clearFromTags(buffer.toString())); + } else if ("group".equals(qName) && currentItem != null) { + yaPage.addYaItem(currentItem); + } + } + + @Override + public void characters(final char[] chars, final int start, final int length) + throws SAXException { + super.characters(chars, start, length); + buffer.write(chars, start, length); + } + + /** + * Clear text from unwanted tags + * @param text text to clear + * @return cleared text + */ + private String clearFromTags(final String text) { + return text.replaceAll("<" + IGNORE_TAG +">", "") + .replaceAll("", ""); + } + } + + @Getter + @Setter + private String user; + + @Getter + @Setter + private String key; + + @Getter + @Setter + private String url; + + + + + /** + * Retrieve Yandex.XML response stream via GET request + * @param query search query + * @param pageNumber number of search page + * @return Yandex.XML response stream + * @throws IOException input/output exception + */ + public InputStream retrieveResponseViaGetRequest( + final String query, + final int pageNumber + ) throws IOException { + + final StringBuilder address = new StringBuilder(this.url); + address.append("user=").append(user).append(AND) + .append("key=").append(key).append(AND) + .append("query=").append(URLEncoder.encode(query, ENC)).append(AND) + .append("page=").append(pageNumber); + final URL url = new URL(address.toString()); + return url.openStream(); + } + + + /** + * Load parsed yandex page from Yandex.XML service + * @param query query for searching + * @param pageNumber number of page + * @return parsed result of searching + * @throws IOException input/output exception + * @throws SAXException parsing exception + */ + public YaPage loadYaPage(final String query, final int pageNumber) + throws IOException, SAXException { + + final YaPage result = new YaPage(query, pageNumber); + final XMLReader xmlReader = XMLReaderFactory.createXMLReader(); + xmlReader.setContentHandler(new YaHandler(result)); + xmlReader.parse( + new InputSource( + this.retrieveResponseViaGetRequest(query, pageNumber) + ) + ); + return result; + } + + +} diff --git a/src/main/java/ru/bvn13/jircbot/bot/JircBot.java b/src/main/java/ru/bvn13/jircbot/bot/JircBot.java index 3dcf962..2eeb86f 100644 --- a/src/main/java/ru/bvn13/jircbot/bot/JircBot.java +++ b/src/main/java/ru/bvn13/jircbot/bot/JircBot.java @@ -10,9 +10,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; +import ru.bvn13.jircbot.Services.YandexSearchService; import ru.bvn13.jircbot.config.JircBotConfiguration; import ru.bvn13.jircbot.listeners.*; +import javax.annotation.PostConstruct; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -30,13 +32,18 @@ public class JircBot extends ListenerAdapter { private Map bots = new HashMap<>(); + @Autowired + private YandexSearchService yandexSearchService; + + @Autowired public JircBot(JircBotConfiguration config) { this.config = config; - this.start(); + //this.start(); } + @PostConstruct public void start() { //Setup this bot @@ -59,6 +66,7 @@ public class JircBot extends ListenerAdapter { //.addListener(new GoogleSearchListener(this.config)) .addListener(new UrlRetrieverListener()) .addListener(new RegexCheckerListener()) + .addListener(new YandexSearchListener(this.config, this.yandexSearchService)) .setServers(servers) .setAutoReconnect(true) //.addAutoJoinChannel(c.getChannelName()) //Join the official #pircbotx channel diff --git a/src/main/java/ru/bvn13/jircbot/config/JircBotConfiguration.java b/src/main/java/ru/bvn13/jircbot/config/JircBotConfiguration.java index c0f6d99..3817cc7 100644 --- a/src/main/java/ru/bvn13/jircbot/config/JircBotConfiguration.java +++ b/src/main/java/ru/bvn13/jircbot/config/JircBotConfiguration.java @@ -10,10 +10,8 @@ import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Configuration; import ru.bvn13.jircbot.listeners.GoogleDoodleListener; -import ru.bvn13.jircbot.model.Config; -import ru.bvn13.jircbot.model.GoogleDoodleSettings; -import ru.bvn13.jircbot.model.GoogleSearchSettings; -import ru.bvn13.jircbot.model.ListenerSettings; +import ru.bvn13.jircbot.listeners.YandexSearchListener; +import ru.bvn13.jircbot.model.*; import javax.annotation.PostConstruct; import java.io.FileNotFoundException; @@ -36,6 +34,7 @@ public class JircBotConfiguration { public static final String KEY_GOOGLE_DOODLE = "google-doodle"; public static final String KEY_GOOGLE_SEARCH = "google-search"; + public static final String KEY_YANDEX_SEARCH = "yandex-search"; @Getter private Map listenersSettings = new HashMap<>(); @@ -65,6 +64,7 @@ public class JircBotConfiguration { JSONObject lstnrSettings = (JSONObject) jsonObject.get("settings"); this.listenersSettings.put(KEY_GOOGLE_DOODLE, this.readGoogleDoodleSettins(lstnrSettings)); this.listenersSettings.put(KEY_GOOGLE_SEARCH, this.readGoogleSearchSettings(lstnrSettings)); + this.listenersSettings.put(KEY_YANDEX_SEARCH, this.readYandexSearchSettings(lstnrSettings)); } catch (FileNotFoundException e) { e.printStackTrace(); @@ -104,9 +104,9 @@ public class JircBotConfiguration { return config; } - private ListenerSettings readGoogleDoodleSettins(JSONObject settings) { + private ListenerSettings readGoogleDoodleSettins(JSONObject data) { GoogleDoodleSettings sets = new GoogleDoodleSettings(); - JSONObject googleSets = (JSONObject) settings.get("doodle"); + JSONObject googleSets = (JSONObject) data.get("doodle"); sets.setMainUrl((String) googleSets.get("mainUrl")); sets.setCheckUrl((String) googleSets.get("checkUrl")); sets.setLinkSelector((String) googleSets.get("linkSelector")); @@ -115,11 +115,20 @@ public class JircBotConfiguration { return sets; } - private ListenerSettings readGoogleSearchSettings(JSONObject settings) { + private ListenerSettings readGoogleSearchSettings(JSONObject data) { GoogleSearchSettings sets = new GoogleSearchSettings(); - JSONObject searchSets = (JSONObject) settings.get("search"); + JSONObject searchSets = (JSONObject) data.get("search"); sets.setUuid((String) searchSets.get("uuid")); return sets; } + private ListenerSettings readYandexSearchSettings(JSONObject data) { + YandexSearchSettings sets = new YandexSearchSettings(); + JSONObject searchSets = (JSONObject) data.get("yaSearch"); + sets.setUrl((String) searchSets.get("url")); + sets.setUser((String) searchSets.get("user")); + sets.setKey((String) searchSets.get("key")); + return sets; + } + } diff --git a/src/main/java/ru/bvn13/jircbot/listeners/YandexSearchListener.java b/src/main/java/ru/bvn13/jircbot/listeners/YandexSearchListener.java new file mode 100644 index 0000000..014b0d8 --- /dev/null +++ b/src/main/java/ru/bvn13/jircbot/listeners/YandexSearchListener.java @@ -0,0 +1,62 @@ +package ru.bvn13.jircbot.listeners; + +import org.pircbotx.hooks.ListenerAdapter; +import org.pircbotx.hooks.types.GenericMessageEvent; +import ru.bvn13.jircbot.Services.YandexSearchService; +import ru.bvn13.jircbot.config.JircBotConfiguration; +import ru.bvn13.jircbot.model.YandexSearchSettings; + +import static ru.bvn13.jircbot.config.JircBotConfiguration.KEY_YANDEX_SEARCH; + +public class YandexSearchListener extends ListenerAdapter { + + private static final String COMMAND = "?search "; + + + private YandexSearchSettings config; + + private YandexSearchService yandexSearchService; + + public YandexSearchListener(JircBotConfiguration config, YandexSearchService yandexSearchService) { + this.config = (YandexSearchSettings) config.getListenersSettings().get(KEY_YANDEX_SEARCH); + this.yandexSearchService = yandexSearchService; + this.yandexSearchService.setKey(this.config.getKey()); + this.yandexSearchService.setUser(this.config.getUser()); + this.yandexSearchService.setUrl(this.config.getUrl()); + } + + + @Override + public void onGenericMessage(final GenericMessageEvent event) throws Exception { + + if (event.getUser().getUserId().equals(event.getBot().getUserBot().getUserId())) { + return; + } + + if (!event.getMessage().startsWith(COMMAND)) { + return; + } + + String message = event.getMessage().substring(COMMAND.length()).trim(); + + try { + final YandexSearchService.YaPage result = this.yandexSearchService.loadYaPage(message, 0); + int i = 0; + for (YandexSearchService.YaItem item : result.getYaItems()) { + if (i++ == 0) { + event.respond(String.format("%s - %s (%s)", item.getUrl(), item.getTitle(), item.getDescription())); + //event.respond("Next entries were sended privately."); + } else { + event.respondPrivateMessage(String.format("%d. %s - %s (%s)", (i - 1), item.getUrl(), item.getTitle(), item.getDescription())); + } + } + if (i == 0) { + event.respond("Not found"); + } + } catch (Exception exp) { + exp.printStackTrace(); + event.respond("ERROR has been occurred. Try again later."); + } + } + +} diff --git a/src/main/java/ru/bvn13/jircbot/model/YandexSearchSettings.java b/src/main/java/ru/bvn13/jircbot/model/YandexSearchSettings.java new file mode 100644 index 0000000..5296451 --- /dev/null +++ b/src/main/java/ru/bvn13/jircbot/model/YandexSearchSettings.java @@ -0,0 +1,14 @@ +package ru.bvn13.jircbot.model; + +import lombok.Data; + +@Data +public class YandexSearchSettings implements ListenerSettings { + + private String url; + + private String user; + + private String key; + +}