From 6cbf28169800d4f38e4f0a9a029e6f5f6a588d10 Mon Sep 17 00:00:00 2001 From: bvn13 Date: Sat, 5 Oct 2019 17:21:41 +0300 Subject: [PATCH] #7 fixed, fixed title retrieving from URL --- pom.xml | 4 +- .../java/ru/bvn13/jircbot/bot/JircBot.java | 7 +- .../database/entities/ChannelSettings.java | 3 + .../ChannelSettingsRepository.java | 9 ++ .../services/ChannelSettingsService.java | 15 ++ .../jircbot/listeners/AdminListener.java | 5 + .../listeners/LinkPreviewListener.java | 88 +----------- .../jircbot/utilities/WebTitleExtractor.java | 128 ++++++++++++++++++ src/main/resources/application.properties | 2 +- 9 files changed, 173 insertions(+), 88 deletions(-) create mode 100644 src/main/java/ru/bvn13/jircbot/utilities/WebTitleExtractor.java diff --git a/pom.xml b/pom.xml index 4dbbcf5..9d5b9d9 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ ru.bvn13 jircbot - 2.2.0 + 2.2.1 jar @@ -18,7 +18,7 @@ - 2.2.0 + 2.2.1 1.8 diff --git a/src/main/java/ru/bvn13/jircbot/bot/JircBot.java b/src/main/java/ru/bvn13/jircbot/bot/JircBot.java index e80eab3..f738937 100644 --- a/src/main/java/ru/bvn13/jircbot/bot/JircBot.java +++ b/src/main/java/ru/bvn13/jircbot/bot/JircBot.java @@ -14,6 +14,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import ru.bvn13.jircbot.config.JircBotConfiguration; +import ru.bvn13.jircbot.database.services.ChannelSettingsService; import ru.bvn13.jircbot.listeners.*; import ru.bvn13.jircbot.listeners.advices.AdviceListener; import ru.bvn13.jircbot.listeners.calculator.CalculatorListener; @@ -43,14 +44,16 @@ public class JircBot extends ListenerAdapter { } private JircBotConfiguration config; + private ChannelSettingsService channelSettingsService; private Map bots = new HashMap<>(); @Autowired - public JircBot(JircBotConfiguration config) { + public JircBot(JircBotConfiguration config, ChannelSettingsService channelSettingsService) { this.config = config; + this.channelSettingsService = channelSettingsService; } @@ -137,7 +140,7 @@ public class JircBot extends ListenerAdapter { .setServers(servers) .setAutoReconnect(true) - .addAutoJoinChannels(c.getChannelsNames()); + .addAutoJoinChannels(channelSettingsService.getListeningChannels(c.getServer(), c.getChannelsNames())); if (c.getBotPassword() != null && !c.getBotPassword().isEmpty()) { confBuilder.setNickservPassword(c.getBotPassword()); diff --git a/src/main/java/ru/bvn13/jircbot/database/entities/ChannelSettings.java b/src/main/java/ru/bvn13/jircbot/database/entities/ChannelSettings.java index d36fdfa..524f2c3 100644 --- a/src/main/java/ru/bvn13/jircbot/database/entities/ChannelSettings.java +++ b/src/main/java/ru/bvn13/jircbot/database/entities/ChannelSettings.java @@ -40,6 +40,9 @@ public class ChannelSettings extends BaseModel { @Column(nullable = false) private Boolean autoRejoinEnabled = false; + @Column(nullable = false, columnDefinition = "Boolean DEFAULT false") + private Boolean joinOnStart = false; + @Column(nullable = false) private Boolean linkPreviewEnabled = false; diff --git a/src/main/java/ru/bvn13/jircbot/database/repositories/ChannelSettingsRepository.java b/src/main/java/ru/bvn13/jircbot/database/repositories/ChannelSettingsRepository.java index 9c1c755..b441934 100644 --- a/src/main/java/ru/bvn13/jircbot/database/repositories/ChannelSettingsRepository.java +++ b/src/main/java/ru/bvn13/jircbot/database/repositories/ChannelSettingsRepository.java @@ -1,11 +1,20 @@ package ru.bvn13.jircbot.database.repositories; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import ru.bvn13.jircbot.database.entities.ChannelSettings; +import java.util.List; + /** * Created by bvn13 on 01.02.2018. */ public interface ChannelSettingsRepository extends JpaRepository { + ChannelSettings getFirstByServerHostAndChannelName(String serverHost, String channelName); + + @Query(value = "select S from ChannelSettings S where S.joinOnStart = true and S.serverHost = :serverHost") + List getAllChannelsToAutoJoinByServerHost(@Param("serverHost") String serverHost); + } diff --git a/src/main/java/ru/bvn13/jircbot/database/services/ChannelSettingsService.java b/src/main/java/ru/bvn13/jircbot/database/services/ChannelSettingsService.java index bf1b4e3..ff5e9b5 100644 --- a/src/main/java/ru/bvn13/jircbot/database/services/ChannelSettingsService.java +++ b/src/main/java/ru/bvn13/jircbot/database/services/ChannelSettingsService.java @@ -5,6 +5,11 @@ import org.springframework.stereotype.Service; import ru.bvn13.jircbot.database.entities.ChannelSettings; import ru.bvn13.jircbot.database.repositories.ChannelSettingsRepository; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + /** * Created by bvn13 on 01.02.2018. */ @@ -38,4 +43,14 @@ public class ChannelSettingsService { channelSettingsRepository.save(settings); } + public Set getListeningChannels(String serverHost, List defaultChannels) { + Set channels = channelSettingsRepository.getAllChannelsToAutoJoinByServerHost(serverHost).stream() + .map(ChannelSettings::getChannelName) + .collect(Collectors.toSet()); + if (channels.isEmpty()) { + channels.addAll(defaultChannels); + } + return channels; + } + } diff --git a/src/main/java/ru/bvn13/jircbot/listeners/AdminListener.java b/src/main/java/ru/bvn13/jircbot/listeners/AdminListener.java index 50e3432..82fa8a0 100644 --- a/src/main/java/ru/bvn13/jircbot/listeners/AdminListener.java +++ b/src/main/java/ru/bvn13/jircbot/listeners/AdminListener.java @@ -92,6 +92,7 @@ public class AdminListener extends ImprovedListenerAdapter implements Descriptio .command("set") .description("set ON|OFF any of bot opportunity for channel\n\n"+ "Opportunities: \n\n"+ + "login | autologin | join-on-start - auto login mode on startup\n"+ "autorejoin | auto-rejoin - auto rejoin channel on kicking\n"+ "bash | bashorg - bach.org quoting\n"+ "deferredmessages | deferred-messages | tell - saving and delivering deferred messages\n"+ @@ -380,6 +381,10 @@ public class AdminListener extends ImprovedListenerAdapter implements Descriptio case "quiz": settings.setQuizEnabled(mode); break; + case "join-on-start": + case "login": + case "autologin": + settings.setJoinOnStart(mode); default: throw new RuntimeException("Setting " + set + " not exist"); } diff --git a/src/main/java/ru/bvn13/jircbot/listeners/LinkPreviewListener.java b/src/main/java/ru/bvn13/jircbot/listeners/LinkPreviewListener.java index 5416fe5..eaf729b 100644 --- a/src/main/java/ru/bvn13/jircbot/listeners/LinkPreviewListener.java +++ b/src/main/java/ru/bvn13/jircbot/listeners/LinkPreviewListener.java @@ -10,6 +10,7 @@ import ru.bvn13.jircbot.documentation.DescriptionProvided; import ru.bvn13.jircbot.documentation.DocumentationProvider; import ru.bvn13.jircbot.documentation.ListenerDescription; import ru.bvn13.jircbot.services.InternetAccessor; +import ru.bvn13.jircbot.utilities.WebTitleExtractor; import java.io.*; import java.util.ArrayList; @@ -25,8 +26,6 @@ import static ru.bvn13.jircbot.documentation.ListenerDescription.CommandDescript @Component public class LinkPreviewListener extends ImprovedListenerAdapter implements DescriptionProvided { - private InternetAccessor internetAccessor; - private static final Pattern REGEX = Pattern.compile("(?i)(?:(?:https?|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?!(?:10|127)(?:\\.\\d{1,3}){3})(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))\\.?)(?::\\d{2,5})?(?:[/?#]\\S*)?"); private ChannelSettingsService channelSettingsService; @@ -60,10 +59,11 @@ public class LinkPreviewListener extends ImprovedListenerAdapter implements Desc List links = findLink(event.getMessage()); for (String link : links) { - String info = parseLink(link); - if (!info.isEmpty()) { + //String info = parseLink(link); + String title = WebTitleExtractor.getPageTitle(link); + if (title != null && !title.isEmpty()) { //event.respond(info); - event.getChannel().send().message(info); + event.getChannel().send().message("TITLE: "+title); } }; @@ -79,84 +79,6 @@ public class LinkPreviewListener extends ImprovedListenerAdapter implements Desc return links; } - - private String parseLink(String link) throws Exception { - String content = internetAccessor.retrieveContentByLink(link); - - String encoding = null; //getCharsetFromHeaders(content.toString()); -// if (encoding == null) { -// encoding = getCharsetFromBody(content.toString()); -// } - - String title = ""; - -// if (encoding != null && !encoding.isEmpty()) { -// content = internetAccessor.retrieveContentByLinkWithEncoding(link, encoding); -// } - - title = content.substring(content.indexOf("") + 7, content.indexOf("")); - - return "Title: "+title; - } - - - public String decodeTitle_buffered(String title, String encoding) throws IOException { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - - Reader r = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(title.getBytes()), encoding)); - Writer w = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); - - char[] buffer=new char[4096]; - int len; - while((len=r.read(buffer)) != -1) { - w.write(buffer, 0, len); - } - r.close(); - w.flush(); - w.close(); - - return out.toString(); - } - - String decodeTitle(String title, String encoding) throws UnsupportedEncodingException { - return new String(title.getBytes("UTF-8"), encoding); - } - - public String getCharsetFromHeaders(String contentType){ - if (contentType != null && contentType.toLowerCase().trim().contains("charset=")){ - String[] parts = contentType.toLowerCase().trim().split("="); - if (parts.length > 0) - return parts[1]; - } - return null; - } - - public static String getCharsetFromBody(String body) { - if (body != null) { - int headEnd = body.toLowerCase().trim().indexOf(""); - - // return null if there's no head tags - if (headEnd == -1) - return null; - - String body_head = body.toLowerCase().substring(0, headEnd); - - Pattern p = Pattern.compile("charset=([\"\'a-z0-9A-Z-]+)"); - Matcher m = p.matcher(body_head); - String str_match = ""; - if (m.find()) { - str_match = m.toMatchResult().group(1); - return str_match.replaceAll("[\"']", ""); - } - } - return null; - } - - @Autowired - public void setInternetAccessor(InternetAccessor internetAccessor) { - this.internetAccessor = internetAccessor; - } - @Autowired public void setChannelSettingsService(ChannelSettingsService channelSettingsService) { this.channelSettingsService = channelSettingsService; diff --git a/src/main/java/ru/bvn13/jircbot/utilities/WebTitleExtractor.java b/src/main/java/ru/bvn13/jircbot/utilities/WebTitleExtractor.java new file mode 100644 index 0000000..491fbec --- /dev/null +++ b/src/main/java/ru/bvn13/jircbot/utilities/WebTitleExtractor.java @@ -0,0 +1,128 @@ +package ru.bvn13.jircbot.utilities; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * @author bvn13 + * @since 05.10.2019 + */ +public class WebTitleExtractor { + /* the CASE_INSENSITIVE flag accounts for + * sites that use uppercase title tags. + * the DOTALL flag accounts for sites that have + * line feeds in the title text */ + private static final Pattern TITLE_TAG = + Pattern.compile("]*>(.*)", Pattern.CASE_INSENSITIVE|Pattern.DOTALL); + + private static final int MAX_READ_BYTES = 10*1024; + + /** + * @param url the HTML page + * @return title text (null if document isn't HTML or lacks a title tag) + * @throws IOException + */ + public static String getPageTitle(String url) throws IOException { + URL u = new URL(url); + URLConnection conn = u.openConnection(); + + // ContentType is an inner class defined below + ContentType contentType = getContentTypeHeader(conn); + if (contentType != null && !contentType.contentType.equals("text/html")) + return null; // don't continue if not HTML + else { + // determine the charset, or use the default + Charset charset = getCharset(contentType); + if (charset == null) + charset = Charset.defaultCharset(); + + // read the response body, using BufferedReader for performance + try (InputStream in = conn.getInputStream(); + BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset)); + ) { + + int n = 0, totalRead = 0; + char[] buf = new char[1024]; + StringBuilder content = new StringBuilder(); + + // read until EOF or first 8192 characters + while (totalRead < MAX_READ_BYTES && (n = reader.read(buf, 0, buf.length)) != -1) { + content.append(buf, 0, n); + totalRead += n; + } + reader.close(); + + // extract the title + Matcher matcher = TITLE_TAG.matcher(content); + if (matcher.find()) { + /* replace any occurrences of whitespace (which may + * include line feeds and other uglies) as well + * as HTML brackets with a space */ + return matcher.group(1).replaceAll("[\\s\\<>]+", " ").trim(); + } else + return null; + } + } + } + + /** + * Loops through response headers until Content-Type is found. + * @param conn + * @return ContentType object representing the value of + * the Content-Type header + */ + private static ContentType getContentTypeHeader(URLConnection conn) { + int i = 0; + boolean moreHeaders = true; + do { + String headerName = conn.getHeaderFieldKey(i); + String headerValue = conn.getHeaderField(i); + if (headerName != null && headerName.equals("Content-Type")) + return new ContentType(headerValue); + + i++; + moreHeaders = headerName != null || headerValue != null; + } + while (moreHeaders); + + return null; + } + + private static Charset getCharset(ContentType contentType) { + if (contentType != null && contentType.charsetName != null && Charset.isSupported(contentType.charsetName)) + return Charset.forName(contentType.charsetName); + else + return null; + } + + /** + * Class holds the content type and charset (if present) + */ + private static final class ContentType { + private static final Pattern CHARSET_HEADER = Pattern.compile("charset=([-_a-zA-Z0-9]+)", Pattern.CASE_INSENSITIVE|Pattern.DOTALL); + + private String contentType; + private String charsetName; + private ContentType(String headerValue) { + if (headerValue == null) + throw new IllegalArgumentException("ContentType must be constructed with a not-null headerValue"); + int n = headerValue.indexOf(";"); + if (n != -1) { + contentType = headerValue.substring(0, n); + Matcher matcher = CHARSET_HEADER.matcher(headerValue); + if (matcher.find()) + charsetName = matcher.group(1); + } + else + contentType = headerValue; + } + } +} diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index fc024ae..cf5671d 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,5 +1,5 @@ -jircbot.version=2.2.0 +jircbot.version=2.2.1 jircbot.config=config.json