#7 fixed, fixed title retrieving from URL

pull/11/head
bvn13 2019-10-05 17:21:41 +03:00
parent adeaff4a01
commit 6cbf281698
9 changed files with 173 additions and 88 deletions

View File

@ -6,7 +6,7 @@
<groupId>ru.bvn13</groupId>
<artifactId>jircbot</artifactId>
<version>2.2.0</version>
<version>2.2.1</version>
<packaging>jar</packaging>
<parent>
@ -18,7 +18,7 @@
<properties>
<bot.version>2.2.0</bot.version>
<bot.version>2.2.1</bot.version>
<java.version>1.8</java.version>

View File

@ -14,6 +14,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import ru.bvn13.jircbot.config.JircBotConfiguration;
import ru.bvn13.jircbot.database.services.ChannelSettingsService;
import ru.bvn13.jircbot.listeners.*;
import ru.bvn13.jircbot.listeners.advices.AdviceListener;
import ru.bvn13.jircbot.listeners.calculator.CalculatorListener;
@ -43,14 +44,16 @@ public class JircBot extends ListenerAdapter {
}
private JircBotConfiguration config;
private ChannelSettingsService channelSettingsService;
private Map<String, PircBotX> bots = new HashMap<>();
@Autowired
public JircBot(JircBotConfiguration config) {
public JircBot(JircBotConfiguration config, ChannelSettingsService channelSettingsService) {
this.config = config;
this.channelSettingsService = channelSettingsService;
}
@ -137,7 +140,7 @@ public class JircBot extends ListenerAdapter {
.setServers(servers)
.setAutoReconnect(true)
.addAutoJoinChannels(c.getChannelsNames());
.addAutoJoinChannels(channelSettingsService.getListeningChannels(c.getServer(), c.getChannelsNames()));
if (c.getBotPassword() != null && !c.getBotPassword().isEmpty()) {
confBuilder.setNickservPassword(c.getBotPassword());

View File

@ -40,6 +40,9 @@ public class ChannelSettings extends BaseModel {
@Column(nullable = false)
private Boolean autoRejoinEnabled = false;
@Column(nullable = false, columnDefinition = "Boolean DEFAULT false")
private Boolean joinOnStart = false;
@Column(nullable = false)
private Boolean linkPreviewEnabled = false;

View File

@ -1,11 +1,20 @@
package ru.bvn13.jircbot.database.repositories;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import ru.bvn13.jircbot.database.entities.ChannelSettings;
import java.util.List;
/**
* Created by bvn13 on 01.02.2018.
*/
public interface ChannelSettingsRepository extends JpaRepository<ChannelSettings, Long> {
ChannelSettings getFirstByServerHostAndChannelName(String serverHost, String channelName);
@Query(value = "select S from ChannelSettings S where S.joinOnStart = true and S.serverHost = :serverHost")
List<ChannelSettings> getAllChannelsToAutoJoinByServerHost(@Param("serverHost") String serverHost);
}

View File

@ -5,6 +5,11 @@ import org.springframework.stereotype.Service;
import ru.bvn13.jircbot.database.entities.ChannelSettings;
import ru.bvn13.jircbot.database.repositories.ChannelSettingsRepository;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Created by bvn13 on 01.02.2018.
*/
@ -38,4 +43,14 @@ public class ChannelSettingsService {
channelSettingsRepository.save(settings);
}
public Set<String> getListeningChannels(String serverHost, List<String> defaultChannels) {
Set<String> channels = channelSettingsRepository.getAllChannelsToAutoJoinByServerHost(serverHost).stream()
.map(ChannelSettings::getChannelName)
.collect(Collectors.toSet());
if (channels.isEmpty()) {
channels.addAll(defaultChannels);
}
return channels;
}
}

View File

@ -92,6 +92,7 @@ public class AdminListener extends ImprovedListenerAdapter implements Descriptio
.command("set")
.description("set ON|OFF any of bot opportunity for channel\n\n"+
"Opportunities: \n\n"+
"login | autologin | join-on-start - auto login mode on startup\n"+
"autorejoin | auto-rejoin - auto rejoin channel on kicking\n"+
"bash | bashorg - bach.org quoting\n"+
"deferredmessages | deferred-messages | tell - saving and delivering deferred messages\n"+
@ -380,6 +381,10 @@ public class AdminListener extends ImprovedListenerAdapter implements Descriptio
case "quiz":
settings.setQuizEnabled(mode);
break;
case "join-on-start":
case "login":
case "autologin":
settings.setJoinOnStart(mode);
default:
throw new RuntimeException("Setting " + set + " not exist");
}

View File

@ -10,6 +10,7 @@ import ru.bvn13.jircbot.documentation.DescriptionProvided;
import ru.bvn13.jircbot.documentation.DocumentationProvider;
import ru.bvn13.jircbot.documentation.ListenerDescription;
import ru.bvn13.jircbot.services.InternetAccessor;
import ru.bvn13.jircbot.utilities.WebTitleExtractor;
import java.io.*;
import java.util.ArrayList;
@ -25,8 +26,6 @@ import static ru.bvn13.jircbot.documentation.ListenerDescription.CommandDescript
@Component
public class LinkPreviewListener extends ImprovedListenerAdapter implements DescriptionProvided {
private InternetAccessor internetAccessor;
private static final Pattern REGEX = Pattern.compile("(?i)(?:(?:https?|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?!(?:10|127)(?:\\.\\d{1,3}){3})(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))\\.?)(?::\\d{2,5})?(?:[/?#]\\S*)?");
private ChannelSettingsService channelSettingsService;
@ -60,10 +59,11 @@ public class LinkPreviewListener extends ImprovedListenerAdapter implements Desc
List<String> links = findLink(event.getMessage());
for (String link : links) {
String info = parseLink(link);
if (!info.isEmpty()) {
//String info = parseLink(link);
String title = WebTitleExtractor.getPageTitle(link);
if (title != null && !title.isEmpty()) {
//event.respond(info);
event.getChannel().send().message(info);
event.getChannel().send().message("TITLE: "+title);
}
};
@ -79,84 +79,6 @@ public class LinkPreviewListener extends ImprovedListenerAdapter implements Desc
return links;
}
private String parseLink(String link) throws Exception {
String content = internetAccessor.retrieveContentByLink(link);
String encoding = null; //getCharsetFromHeaders(content.toString());
// if (encoding == null) {
// encoding = getCharsetFromBody(content.toString());
// }
String title = "";
// if (encoding != null && !encoding.isEmpty()) {
// content = internetAccessor.retrieveContentByLinkWithEncoding(link, encoding);
// }
title = content.substring(content.indexOf("<title>") + 7, content.indexOf("</title>"));
return "Title: "+title;
}
public String decodeTitle_buffered(String title, String encoding) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
Reader r = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(title.getBytes()), encoding));
Writer w = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
char[] buffer=new char[4096];
int len;
while((len=r.read(buffer)) != -1) {
w.write(buffer, 0, len);
}
r.close();
w.flush();
w.close();
return out.toString();
}
String decodeTitle(String title, String encoding) throws UnsupportedEncodingException {
return new String(title.getBytes("UTF-8"), encoding);
}
public String getCharsetFromHeaders(String contentType){
if (contentType != null && contentType.toLowerCase().trim().contains("charset=")){
String[] parts = contentType.toLowerCase().trim().split("=");
if (parts.length > 0)
return parts[1];
}
return null;
}
public static String getCharsetFromBody(String body) {
if (body != null) {
int headEnd = body.toLowerCase().trim().indexOf("</head>");
// return null if there's no head tags
if (headEnd == -1)
return null;
String body_head = body.toLowerCase().substring(0, headEnd);
Pattern p = Pattern.compile("charset=([\"\'a-z0-9A-Z-]+)");
Matcher m = p.matcher(body_head);
String str_match = "";
if (m.find()) {
str_match = m.toMatchResult().group(1);
return str_match.replaceAll("[\"']", "");
}
}
return null;
}
@Autowired
public void setInternetAccessor(InternetAccessor internetAccessor) {
this.internetAccessor = internetAccessor;
}
@Autowired
public void setChannelSettingsService(ChannelSettingsService channelSettingsService) {
this.channelSettingsService = channelSettingsService;

View File

@ -0,0 +1,128 @@
package ru.bvn13.jircbot.utilities;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author bvn13
* @since 05.10.2019
*/
public class WebTitleExtractor {
/* the CASE_INSENSITIVE flag accounts for
* sites that use uppercase title tags.
* the DOTALL flag accounts for sites that have
* line feeds in the title text */
private static final Pattern TITLE_TAG =
Pattern.compile("<title[^>]*>(.*)</title>", Pattern.CASE_INSENSITIVE|Pattern.DOTALL);
private static final int MAX_READ_BYTES = 10*1024;
/**
* @param url the HTML page
* @return title text (null if document isn't HTML or lacks a title tag)
* @throws IOException
*/
public static String getPageTitle(String url) throws IOException {
URL u = new URL(url);
URLConnection conn = u.openConnection();
// ContentType is an inner class defined below
ContentType contentType = getContentTypeHeader(conn);
if (contentType != null && !contentType.contentType.equals("text/html"))
return null; // don't continue if not HTML
else {
// determine the charset, or use the default
Charset charset = getCharset(contentType);
if (charset == null)
charset = Charset.defaultCharset();
// read the response body, using BufferedReader for performance
try (InputStream in = conn.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
) {
int n = 0, totalRead = 0;
char[] buf = new char[1024];
StringBuilder content = new StringBuilder();
// read until EOF or first 8192 characters
while (totalRead < MAX_READ_BYTES && (n = reader.read(buf, 0, buf.length)) != -1) {
content.append(buf, 0, n);
totalRead += n;
}
reader.close();
// extract the title
Matcher matcher = TITLE_TAG.matcher(content);
if (matcher.find()) {
/* replace any occurrences of whitespace (which may
* include line feeds and other uglies) as well
* as HTML brackets with a space */
return matcher.group(1).replaceAll("[\\s\\<>]+", " ").trim();
} else
return null;
}
}
}
/**
* Loops through response headers until Content-Type is found.
* @param conn
* @return ContentType object representing the value of
* the Content-Type header
*/
private static ContentType getContentTypeHeader(URLConnection conn) {
int i = 0;
boolean moreHeaders = true;
do {
String headerName = conn.getHeaderFieldKey(i);
String headerValue = conn.getHeaderField(i);
if (headerName != null && headerName.equals("Content-Type"))
return new ContentType(headerValue);
i++;
moreHeaders = headerName != null || headerValue != null;
}
while (moreHeaders);
return null;
}
private static Charset getCharset(ContentType contentType) {
if (contentType != null && contentType.charsetName != null && Charset.isSupported(contentType.charsetName))
return Charset.forName(contentType.charsetName);
else
return null;
}
/**
* Class holds the content type and charset (if present)
*/
private static final class ContentType {
private static final Pattern CHARSET_HEADER = Pattern.compile("charset=([-_a-zA-Z0-9]+)", Pattern.CASE_INSENSITIVE|Pattern.DOTALL);
private String contentType;
private String charsetName;
private ContentType(String headerValue) {
if (headerValue == null)
throw new IllegalArgumentException("ContentType must be constructed with a not-null headerValue");
int n = headerValue.indexOf(";");
if (n != -1) {
contentType = headerValue.substring(0, n);
Matcher matcher = CHARSET_HEADER.matcher(headerValue);
if (matcher.find())
charsetName = matcher.group(1);
}
else
contentType = headerValue;
}
}
}

View File

@ -1,5 +1,5 @@
jircbot.version=2.2.0
jircbot.version=2.2.1
jircbot.config=config.json