mirror of https://github.com/bvn13/JIrcBot.git
#7 fixed, fixed title retrieving from URL
parent
adeaff4a01
commit
6cbf281698
4
pom.xml
4
pom.xml
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
<groupId>ru.bvn13</groupId>
|
<groupId>ru.bvn13</groupId>
|
||||||
<artifactId>jircbot</artifactId>
|
<artifactId>jircbot</artifactId>
|
||||||
<version>2.2.0</version>
|
<version>2.2.1</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<parent>
|
<parent>
|
||||||
|
@ -18,7 +18,7 @@
|
||||||
|
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<bot.version>2.2.0</bot.version>
|
<bot.version>2.2.1</bot.version>
|
||||||
|
|
||||||
<java.version>1.8</java.version>
|
<java.version>1.8</java.version>
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import ru.bvn13.jircbot.config.JircBotConfiguration;
|
import ru.bvn13.jircbot.config.JircBotConfiguration;
|
||||||
|
import ru.bvn13.jircbot.database.services.ChannelSettingsService;
|
||||||
import ru.bvn13.jircbot.listeners.*;
|
import ru.bvn13.jircbot.listeners.*;
|
||||||
import ru.bvn13.jircbot.listeners.advices.AdviceListener;
|
import ru.bvn13.jircbot.listeners.advices.AdviceListener;
|
||||||
import ru.bvn13.jircbot.listeners.calculator.CalculatorListener;
|
import ru.bvn13.jircbot.listeners.calculator.CalculatorListener;
|
||||||
|
@ -43,14 +44,16 @@ public class JircBot extends ListenerAdapter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private JircBotConfiguration config;
|
private JircBotConfiguration config;
|
||||||
|
private ChannelSettingsService channelSettingsService;
|
||||||
|
|
||||||
private Map<String, PircBotX> bots = new HashMap<>();
|
private Map<String, PircBotX> bots = new HashMap<>();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
public JircBot(JircBotConfiguration config) {
|
public JircBot(JircBotConfiguration config, ChannelSettingsService channelSettingsService) {
|
||||||
this.config = config;
|
this.config = config;
|
||||||
|
this.channelSettingsService = channelSettingsService;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -137,7 +140,7 @@ public class JircBot extends ListenerAdapter {
|
||||||
|
|
||||||
.setServers(servers)
|
.setServers(servers)
|
||||||
.setAutoReconnect(true)
|
.setAutoReconnect(true)
|
||||||
.addAutoJoinChannels(c.getChannelsNames());
|
.addAutoJoinChannels(channelSettingsService.getListeningChannels(c.getServer(), c.getChannelsNames()));
|
||||||
|
|
||||||
if (c.getBotPassword() != null && !c.getBotPassword().isEmpty()) {
|
if (c.getBotPassword() != null && !c.getBotPassword().isEmpty()) {
|
||||||
confBuilder.setNickservPassword(c.getBotPassword());
|
confBuilder.setNickservPassword(c.getBotPassword());
|
||||||
|
|
|
@ -40,6 +40,9 @@ public class ChannelSettings extends BaseModel {
|
||||||
@Column(nullable = false)
|
@Column(nullable = false)
|
||||||
private Boolean autoRejoinEnabled = false;
|
private Boolean autoRejoinEnabled = false;
|
||||||
|
|
||||||
|
@Column(nullable = false, columnDefinition = "Boolean DEFAULT false")
|
||||||
|
private Boolean joinOnStart = false;
|
||||||
|
|
||||||
@Column(nullable = false)
|
@Column(nullable = false)
|
||||||
private Boolean linkPreviewEnabled = false;
|
private Boolean linkPreviewEnabled = false;
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,20 @@
|
||||||
package ru.bvn13.jircbot.database.repositories;
|
package ru.bvn13.jircbot.database.repositories;
|
||||||
|
|
||||||
import org.springframework.data.jpa.repository.JpaRepository;
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
import org.springframework.data.jpa.repository.Query;
|
||||||
|
import org.springframework.data.repository.query.Param;
|
||||||
import ru.bvn13.jircbot.database.entities.ChannelSettings;
|
import ru.bvn13.jircbot.database.entities.ChannelSettings;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by bvn13 on 01.02.2018.
|
* Created by bvn13 on 01.02.2018.
|
||||||
*/
|
*/
|
||||||
public interface ChannelSettingsRepository extends JpaRepository<ChannelSettings, Long> {
|
public interface ChannelSettingsRepository extends JpaRepository<ChannelSettings, Long> {
|
||||||
|
|
||||||
ChannelSettings getFirstByServerHostAndChannelName(String serverHost, String channelName);
|
ChannelSettings getFirstByServerHostAndChannelName(String serverHost, String channelName);
|
||||||
|
|
||||||
|
@Query(value = "select S from ChannelSettings S where S.joinOnStart = true and S.serverHost = :serverHost")
|
||||||
|
List<ChannelSettings> getAllChannelsToAutoJoinByServerHost(@Param("serverHost") String serverHost);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,11 @@ import org.springframework.stereotype.Service;
|
||||||
import ru.bvn13.jircbot.database.entities.ChannelSettings;
|
import ru.bvn13.jircbot.database.entities.ChannelSettings;
|
||||||
import ru.bvn13.jircbot.database.repositories.ChannelSettingsRepository;
|
import ru.bvn13.jircbot.database.repositories.ChannelSettingsRepository;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by bvn13 on 01.02.2018.
|
* Created by bvn13 on 01.02.2018.
|
||||||
*/
|
*/
|
||||||
|
@ -38,4 +43,14 @@ public class ChannelSettingsService {
|
||||||
channelSettingsRepository.save(settings);
|
channelSettingsRepository.save(settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Set<String> getListeningChannels(String serverHost, List<String> defaultChannels) {
|
||||||
|
Set<String> channels = channelSettingsRepository.getAllChannelsToAutoJoinByServerHost(serverHost).stream()
|
||||||
|
.map(ChannelSettings::getChannelName)
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
if (channels.isEmpty()) {
|
||||||
|
channels.addAll(defaultChannels);
|
||||||
|
}
|
||||||
|
return channels;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,6 +92,7 @@ public class AdminListener extends ImprovedListenerAdapter implements Descriptio
|
||||||
.command("set")
|
.command("set")
|
||||||
.description("set ON|OFF any of bot opportunity for channel\n\n"+
|
.description("set ON|OFF any of bot opportunity for channel\n\n"+
|
||||||
"Opportunities: \n\n"+
|
"Opportunities: \n\n"+
|
||||||
|
"login | autologin | join-on-start - auto login mode on startup\n"+
|
||||||
"autorejoin | auto-rejoin - auto rejoin channel on kicking\n"+
|
"autorejoin | auto-rejoin - auto rejoin channel on kicking\n"+
|
||||||
"bash | bashorg - bach.org quoting\n"+
|
"bash | bashorg - bach.org quoting\n"+
|
||||||
"deferredmessages | deferred-messages | tell - saving and delivering deferred messages\n"+
|
"deferredmessages | deferred-messages | tell - saving and delivering deferred messages\n"+
|
||||||
|
@ -380,6 +381,10 @@ public class AdminListener extends ImprovedListenerAdapter implements Descriptio
|
||||||
case "quiz":
|
case "quiz":
|
||||||
settings.setQuizEnabled(mode);
|
settings.setQuizEnabled(mode);
|
||||||
break;
|
break;
|
||||||
|
case "join-on-start":
|
||||||
|
case "login":
|
||||||
|
case "autologin":
|
||||||
|
settings.setJoinOnStart(mode);
|
||||||
default:
|
default:
|
||||||
throw new RuntimeException("Setting " + set + " not exist");
|
throw new RuntimeException("Setting " + set + " not exist");
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,6 +10,7 @@ import ru.bvn13.jircbot.documentation.DescriptionProvided;
|
||||||
import ru.bvn13.jircbot.documentation.DocumentationProvider;
|
import ru.bvn13.jircbot.documentation.DocumentationProvider;
|
||||||
import ru.bvn13.jircbot.documentation.ListenerDescription;
|
import ru.bvn13.jircbot.documentation.ListenerDescription;
|
||||||
import ru.bvn13.jircbot.services.InternetAccessor;
|
import ru.bvn13.jircbot.services.InternetAccessor;
|
||||||
|
import ru.bvn13.jircbot.utilities.WebTitleExtractor;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -25,8 +26,6 @@ import static ru.bvn13.jircbot.documentation.ListenerDescription.CommandDescript
|
||||||
@Component
|
@Component
|
||||||
public class LinkPreviewListener extends ImprovedListenerAdapter implements DescriptionProvided {
|
public class LinkPreviewListener extends ImprovedListenerAdapter implements DescriptionProvided {
|
||||||
|
|
||||||
private InternetAccessor internetAccessor;
|
|
||||||
|
|
||||||
private static final Pattern REGEX = Pattern.compile("(?i)(?:(?:https?|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?!(?:10|127)(?:\\.\\d{1,3}){3})(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))\\.?)(?::\\d{2,5})?(?:[/?#]\\S*)?");
|
private static final Pattern REGEX = Pattern.compile("(?i)(?:(?:https?|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?!(?:10|127)(?:\\.\\d{1,3}){3})(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))\\.?)(?::\\d{2,5})?(?:[/?#]\\S*)?");
|
||||||
|
|
||||||
private ChannelSettingsService channelSettingsService;
|
private ChannelSettingsService channelSettingsService;
|
||||||
|
@ -60,10 +59,11 @@ public class LinkPreviewListener extends ImprovedListenerAdapter implements Desc
|
||||||
|
|
||||||
List<String> links = findLink(event.getMessage());
|
List<String> links = findLink(event.getMessage());
|
||||||
for (String link : links) {
|
for (String link : links) {
|
||||||
String info = parseLink(link);
|
//String info = parseLink(link);
|
||||||
if (!info.isEmpty()) {
|
String title = WebTitleExtractor.getPageTitle(link);
|
||||||
|
if (title != null && !title.isEmpty()) {
|
||||||
//event.respond(info);
|
//event.respond(info);
|
||||||
event.getChannel().send().message(info);
|
event.getChannel().send().message("TITLE: "+title);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -79,84 +79,6 @@ public class LinkPreviewListener extends ImprovedListenerAdapter implements Desc
|
||||||
return links;
|
return links;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String parseLink(String link) throws Exception {
|
|
||||||
String content = internetAccessor.retrieveContentByLink(link);
|
|
||||||
|
|
||||||
String encoding = null; //getCharsetFromHeaders(content.toString());
|
|
||||||
// if (encoding == null) {
|
|
||||||
// encoding = getCharsetFromBody(content.toString());
|
|
||||||
// }
|
|
||||||
|
|
||||||
String title = "";
|
|
||||||
|
|
||||||
// if (encoding != null && !encoding.isEmpty()) {
|
|
||||||
// content = internetAccessor.retrieveContentByLinkWithEncoding(link, encoding);
|
|
||||||
// }
|
|
||||||
|
|
||||||
title = content.substring(content.indexOf("<title>") + 7, content.indexOf("</title>"));
|
|
||||||
|
|
||||||
return "Title: "+title;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public String decodeTitle_buffered(String title, String encoding) throws IOException {
|
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
||||||
|
|
||||||
Reader r = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(title.getBytes()), encoding));
|
|
||||||
Writer w = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
|
|
||||||
|
|
||||||
char[] buffer=new char[4096];
|
|
||||||
int len;
|
|
||||||
while((len=r.read(buffer)) != -1) {
|
|
||||||
w.write(buffer, 0, len);
|
|
||||||
}
|
|
||||||
r.close();
|
|
||||||
w.flush();
|
|
||||||
w.close();
|
|
||||||
|
|
||||||
return out.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
String decodeTitle(String title, String encoding) throws UnsupportedEncodingException {
|
|
||||||
return new String(title.getBytes("UTF-8"), encoding);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCharsetFromHeaders(String contentType){
|
|
||||||
if (contentType != null && contentType.toLowerCase().trim().contains("charset=")){
|
|
||||||
String[] parts = contentType.toLowerCase().trim().split("=");
|
|
||||||
if (parts.length > 0)
|
|
||||||
return parts[1];
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getCharsetFromBody(String body) {
|
|
||||||
if (body != null) {
|
|
||||||
int headEnd = body.toLowerCase().trim().indexOf("</head>");
|
|
||||||
|
|
||||||
// return null if there's no head tags
|
|
||||||
if (headEnd == -1)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
String body_head = body.toLowerCase().substring(0, headEnd);
|
|
||||||
|
|
||||||
Pattern p = Pattern.compile("charset=([\"\'a-z0-9A-Z-]+)");
|
|
||||||
Matcher m = p.matcher(body_head);
|
|
||||||
String str_match = "";
|
|
||||||
if (m.find()) {
|
|
||||||
str_match = m.toMatchResult().group(1);
|
|
||||||
return str_match.replaceAll("[\"']", "");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Autowired
|
|
||||||
public void setInternetAccessor(InternetAccessor internetAccessor) {
|
|
||||||
this.internetAccessor = internetAccessor;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
public void setChannelSettingsService(ChannelSettingsService channelSettingsService) {
|
public void setChannelSettingsService(ChannelSettingsService channelSettingsService) {
|
||||||
this.channelSettingsService = channelSettingsService;
|
this.channelSettingsService = channelSettingsService;
|
||||||
|
|
|
@ -0,0 +1,128 @@
|
||||||
|
package ru.bvn13.jircbot.utilities;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author bvn13
|
||||||
|
* @since 05.10.2019
|
||||||
|
*/
|
||||||
|
public class WebTitleExtractor {
|
||||||
|
/* the CASE_INSENSITIVE flag accounts for
|
||||||
|
* sites that use uppercase title tags.
|
||||||
|
* the DOTALL flag accounts for sites that have
|
||||||
|
* line feeds in the title text */
|
||||||
|
private static final Pattern TITLE_TAG =
|
||||||
|
Pattern.compile("<title[^>]*>(.*)</title>", Pattern.CASE_INSENSITIVE|Pattern.DOTALL);
|
||||||
|
|
||||||
|
private static final int MAX_READ_BYTES = 10*1024;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param url the HTML page
|
||||||
|
* @return title text (null if document isn't HTML or lacks a title tag)
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static String getPageTitle(String url) throws IOException {
|
||||||
|
URL u = new URL(url);
|
||||||
|
URLConnection conn = u.openConnection();
|
||||||
|
|
||||||
|
// ContentType is an inner class defined below
|
||||||
|
ContentType contentType = getContentTypeHeader(conn);
|
||||||
|
if (contentType != null && !contentType.contentType.equals("text/html"))
|
||||||
|
return null; // don't continue if not HTML
|
||||||
|
else {
|
||||||
|
// determine the charset, or use the default
|
||||||
|
Charset charset = getCharset(contentType);
|
||||||
|
if (charset == null)
|
||||||
|
charset = Charset.defaultCharset();
|
||||||
|
|
||||||
|
// read the response body, using BufferedReader for performance
|
||||||
|
try (InputStream in = conn.getInputStream();
|
||||||
|
BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
|
||||||
|
) {
|
||||||
|
|
||||||
|
int n = 0, totalRead = 0;
|
||||||
|
char[] buf = new char[1024];
|
||||||
|
StringBuilder content = new StringBuilder();
|
||||||
|
|
||||||
|
// read until EOF or first 8192 characters
|
||||||
|
while (totalRead < MAX_READ_BYTES && (n = reader.read(buf, 0, buf.length)) != -1) {
|
||||||
|
content.append(buf, 0, n);
|
||||||
|
totalRead += n;
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
// extract the title
|
||||||
|
Matcher matcher = TITLE_TAG.matcher(content);
|
||||||
|
if (matcher.find()) {
|
||||||
|
/* replace any occurrences of whitespace (which may
|
||||||
|
* include line feeds and other uglies) as well
|
||||||
|
* as HTML brackets with a space */
|
||||||
|
return matcher.group(1).replaceAll("[\\s\\<>]+", " ").trim();
|
||||||
|
} else
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loops through response headers until Content-Type is found.
|
||||||
|
* @param conn
|
||||||
|
* @return ContentType object representing the value of
|
||||||
|
* the Content-Type header
|
||||||
|
*/
|
||||||
|
private static ContentType getContentTypeHeader(URLConnection conn) {
|
||||||
|
int i = 0;
|
||||||
|
boolean moreHeaders = true;
|
||||||
|
do {
|
||||||
|
String headerName = conn.getHeaderFieldKey(i);
|
||||||
|
String headerValue = conn.getHeaderField(i);
|
||||||
|
if (headerName != null && headerName.equals("Content-Type"))
|
||||||
|
return new ContentType(headerValue);
|
||||||
|
|
||||||
|
i++;
|
||||||
|
moreHeaders = headerName != null || headerValue != null;
|
||||||
|
}
|
||||||
|
while (moreHeaders);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Charset getCharset(ContentType contentType) {
|
||||||
|
if (contentType != null && contentType.charsetName != null && Charset.isSupported(contentType.charsetName))
|
||||||
|
return Charset.forName(contentType.charsetName);
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class holds the content type and charset (if present)
|
||||||
|
*/
|
||||||
|
private static final class ContentType {
|
||||||
|
private static final Pattern CHARSET_HEADER = Pattern.compile("charset=([-_a-zA-Z0-9]+)", Pattern.CASE_INSENSITIVE|Pattern.DOTALL);
|
||||||
|
|
||||||
|
private String contentType;
|
||||||
|
private String charsetName;
|
||||||
|
private ContentType(String headerValue) {
|
||||||
|
if (headerValue == null)
|
||||||
|
throw new IllegalArgumentException("ContentType must be constructed with a not-null headerValue");
|
||||||
|
int n = headerValue.indexOf(";");
|
||||||
|
if (n != -1) {
|
||||||
|
contentType = headerValue.substring(0, n);
|
||||||
|
Matcher matcher = CHARSET_HEADER.matcher(headerValue);
|
||||||
|
if (matcher.find())
|
||||||
|
charsetName = matcher.group(1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
contentType = headerValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
jircbot.version=2.2.0
|
jircbot.version=2.2.1
|
||||||
|
|
||||||
jircbot.config=config.json
|
jircbot.config=config.json
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue