mirror of https://github.com/bvn13/JIrcBot.git
#7 fixed, fixed title retrieving from URL
parent
adeaff4a01
commit
6cbf281698
4
pom.xml
4
pom.xml
|
@ -6,7 +6,7 @@
|
|||
|
||||
<groupId>ru.bvn13</groupId>
|
||||
<artifactId>jircbot</artifactId>
|
||||
<version>2.2.0</version>
|
||||
<version>2.2.1</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<parent>
|
||||
|
@ -18,7 +18,7 @@
|
|||
|
||||
|
||||
<properties>
|
||||
<bot.version>2.2.0</bot.version>
|
||||
<bot.version>2.2.1</bot.version>
|
||||
|
||||
<java.version>1.8</java.version>
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ import org.springframework.beans.factory.annotation.Autowired;
|
|||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.bvn13.jircbot.config.JircBotConfiguration;
|
||||
import ru.bvn13.jircbot.database.services.ChannelSettingsService;
|
||||
import ru.bvn13.jircbot.listeners.*;
|
||||
import ru.bvn13.jircbot.listeners.advices.AdviceListener;
|
||||
import ru.bvn13.jircbot.listeners.calculator.CalculatorListener;
|
||||
|
@ -43,14 +44,16 @@ public class JircBot extends ListenerAdapter {
|
|||
}
|
||||
|
||||
private JircBotConfiguration config;
|
||||
private ChannelSettingsService channelSettingsService;
|
||||
|
||||
private Map<String, PircBotX> bots = new HashMap<>();
|
||||
|
||||
|
||||
|
||||
@Autowired
|
||||
public JircBot(JircBotConfiguration config) {
|
||||
public JircBot(JircBotConfiguration config, ChannelSettingsService channelSettingsService) {
|
||||
this.config = config;
|
||||
this.channelSettingsService = channelSettingsService;
|
||||
}
|
||||
|
||||
|
||||
|
@ -137,7 +140,7 @@ public class JircBot extends ListenerAdapter {
|
|||
|
||||
.setServers(servers)
|
||||
.setAutoReconnect(true)
|
||||
.addAutoJoinChannels(c.getChannelsNames());
|
||||
.addAutoJoinChannels(channelSettingsService.getListeningChannels(c.getServer(), c.getChannelsNames()));
|
||||
|
||||
if (c.getBotPassword() != null && !c.getBotPassword().isEmpty()) {
|
||||
confBuilder.setNickservPassword(c.getBotPassword());
|
||||
|
|
|
@ -40,6 +40,9 @@ public class ChannelSettings extends BaseModel {
|
|||
@Column(nullable = false)
|
||||
private Boolean autoRejoinEnabled = false;
|
||||
|
||||
@Column(nullable = false, columnDefinition = "Boolean DEFAULT false")
|
||||
private Boolean joinOnStart = false;
|
||||
|
||||
@Column(nullable = false)
|
||||
private Boolean linkPreviewEnabled = false;
|
||||
|
||||
|
|
|
@ -1,11 +1,20 @@
|
|||
package ru.bvn13.jircbot.database.repositories;
|
||||
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.data.jpa.repository.Query;
|
||||
import org.springframework.data.repository.query.Param;
|
||||
import ru.bvn13.jircbot.database.entities.ChannelSettings;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by bvn13 on 01.02.2018.
|
||||
*/
|
||||
public interface ChannelSettingsRepository extends JpaRepository<ChannelSettings, Long> {
|
||||
|
||||
ChannelSettings getFirstByServerHostAndChannelName(String serverHost, String channelName);
|
||||
|
||||
@Query(value = "select S from ChannelSettings S where S.joinOnStart = true and S.serverHost = :serverHost")
|
||||
List<ChannelSettings> getAllChannelsToAutoJoinByServerHost(@Param("serverHost") String serverHost);
|
||||
|
||||
}
|
||||
|
|
|
@ -5,6 +5,11 @@ import org.springframework.stereotype.Service;
|
|||
import ru.bvn13.jircbot.database.entities.ChannelSettings;
|
||||
import ru.bvn13.jircbot.database.repositories.ChannelSettingsRepository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Created by bvn13 on 01.02.2018.
|
||||
*/
|
||||
|
@ -38,4 +43,14 @@ public class ChannelSettingsService {
|
|||
channelSettingsRepository.save(settings);
|
||||
}
|
||||
|
||||
public Set<String> getListeningChannels(String serverHost, List<String> defaultChannels) {
|
||||
Set<String> channels = channelSettingsRepository.getAllChannelsToAutoJoinByServerHost(serverHost).stream()
|
||||
.map(ChannelSettings::getChannelName)
|
||||
.collect(Collectors.toSet());
|
||||
if (channels.isEmpty()) {
|
||||
channels.addAll(defaultChannels);
|
||||
}
|
||||
return channels;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -92,6 +92,7 @@ public class AdminListener extends ImprovedListenerAdapter implements Descriptio
|
|||
.command("set")
|
||||
.description("set ON|OFF any of bot opportunity for channel\n\n"+
|
||||
"Opportunities: \n\n"+
|
||||
"login | autologin | join-on-start - auto login mode on startup\n"+
|
||||
"autorejoin | auto-rejoin - auto rejoin channel on kicking\n"+
|
||||
"bash | bashorg - bach.org quoting\n"+
|
||||
"deferredmessages | deferred-messages | tell - saving and delivering deferred messages\n"+
|
||||
|
@ -380,6 +381,10 @@ public class AdminListener extends ImprovedListenerAdapter implements Descriptio
|
|||
case "quiz":
|
||||
settings.setQuizEnabled(mode);
|
||||
break;
|
||||
case "join-on-start":
|
||||
case "login":
|
||||
case "autologin":
|
||||
settings.setJoinOnStart(mode);
|
||||
default:
|
||||
throw new RuntimeException("Setting " + set + " not exist");
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ import ru.bvn13.jircbot.documentation.DescriptionProvided;
|
|||
import ru.bvn13.jircbot.documentation.DocumentationProvider;
|
||||
import ru.bvn13.jircbot.documentation.ListenerDescription;
|
||||
import ru.bvn13.jircbot.services.InternetAccessor;
|
||||
import ru.bvn13.jircbot.utilities.WebTitleExtractor;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
|
@ -25,8 +26,6 @@ import static ru.bvn13.jircbot.documentation.ListenerDescription.CommandDescript
|
|||
@Component
|
||||
public class LinkPreviewListener extends ImprovedListenerAdapter implements DescriptionProvided {
|
||||
|
||||
private InternetAccessor internetAccessor;
|
||||
|
||||
private static final Pattern REGEX = Pattern.compile("(?i)(?:(?:https?|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?!(?:10|127)(?:\\.\\d{1,3}){3})(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))\\.?)(?::\\d{2,5})?(?:[/?#]\\S*)?");
|
||||
|
||||
private ChannelSettingsService channelSettingsService;
|
||||
|
@ -60,10 +59,11 @@ public class LinkPreviewListener extends ImprovedListenerAdapter implements Desc
|
|||
|
||||
List<String> links = findLink(event.getMessage());
|
||||
for (String link : links) {
|
||||
String info = parseLink(link);
|
||||
if (!info.isEmpty()) {
|
||||
//String info = parseLink(link);
|
||||
String title = WebTitleExtractor.getPageTitle(link);
|
||||
if (title != null && !title.isEmpty()) {
|
||||
//event.respond(info);
|
||||
event.getChannel().send().message(info);
|
||||
event.getChannel().send().message("TITLE: "+title);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -79,84 +79,6 @@ public class LinkPreviewListener extends ImprovedListenerAdapter implements Desc
|
|||
return links;
|
||||
}
|
||||
|
||||
|
||||
private String parseLink(String link) throws Exception {
|
||||
String content = internetAccessor.retrieveContentByLink(link);
|
||||
|
||||
String encoding = null; //getCharsetFromHeaders(content.toString());
|
||||
// if (encoding == null) {
|
||||
// encoding = getCharsetFromBody(content.toString());
|
||||
// }
|
||||
|
||||
String title = "";
|
||||
|
||||
// if (encoding != null && !encoding.isEmpty()) {
|
||||
// content = internetAccessor.retrieveContentByLinkWithEncoding(link, encoding);
|
||||
// }
|
||||
|
||||
title = content.substring(content.indexOf("<title>") + 7, content.indexOf("</title>"));
|
||||
|
||||
return "Title: "+title;
|
||||
}
|
||||
|
||||
|
||||
public String decodeTitle_buffered(String title, String encoding) throws IOException {
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
|
||||
Reader r = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(title.getBytes()), encoding));
|
||||
Writer w = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
|
||||
|
||||
char[] buffer=new char[4096];
|
||||
int len;
|
||||
while((len=r.read(buffer)) != -1) {
|
||||
w.write(buffer, 0, len);
|
||||
}
|
||||
r.close();
|
||||
w.flush();
|
||||
w.close();
|
||||
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
String decodeTitle(String title, String encoding) throws UnsupportedEncodingException {
|
||||
return new String(title.getBytes("UTF-8"), encoding);
|
||||
}
|
||||
|
||||
public String getCharsetFromHeaders(String contentType){
|
||||
if (contentType != null && contentType.toLowerCase().trim().contains("charset=")){
|
||||
String[] parts = contentType.toLowerCase().trim().split("=");
|
||||
if (parts.length > 0)
|
||||
return parts[1];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static String getCharsetFromBody(String body) {
|
||||
if (body != null) {
|
||||
int headEnd = body.toLowerCase().trim().indexOf("</head>");
|
||||
|
||||
// return null if there's no head tags
|
||||
if (headEnd == -1)
|
||||
return null;
|
||||
|
||||
String body_head = body.toLowerCase().substring(0, headEnd);
|
||||
|
||||
Pattern p = Pattern.compile("charset=([\"\'a-z0-9A-Z-]+)");
|
||||
Matcher m = p.matcher(body_head);
|
||||
String str_match = "";
|
||||
if (m.find()) {
|
||||
str_match = m.toMatchResult().group(1);
|
||||
return str_match.replaceAll("[\"']", "");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Autowired
|
||||
public void setInternetAccessor(InternetAccessor internetAccessor) {
|
||||
this.internetAccessor = internetAccessor;
|
||||
}
|
||||
|
||||
@Autowired
|
||||
public void setChannelSettingsService(ChannelSettingsService channelSettingsService) {
|
||||
this.channelSettingsService = channelSettingsService;
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
package ru.bvn13.jircbot.utilities;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* @author bvn13
|
||||
* @since 05.10.2019
|
||||
*/
|
||||
public class WebTitleExtractor {
|
||||
/* the CASE_INSENSITIVE flag accounts for
|
||||
* sites that use uppercase title tags.
|
||||
* the DOTALL flag accounts for sites that have
|
||||
* line feeds in the title text */
|
||||
private static final Pattern TITLE_TAG =
|
||||
Pattern.compile("<title[^>]*>(.*)</title>", Pattern.CASE_INSENSITIVE|Pattern.DOTALL);
|
||||
|
||||
private static final int MAX_READ_BYTES = 10*1024;
|
||||
|
||||
/**
|
||||
* @param url the HTML page
|
||||
* @return title text (null if document isn't HTML or lacks a title tag)
|
||||
* @throws IOException
|
||||
*/
|
||||
public static String getPageTitle(String url) throws IOException {
|
||||
URL u = new URL(url);
|
||||
URLConnection conn = u.openConnection();
|
||||
|
||||
// ContentType is an inner class defined below
|
||||
ContentType contentType = getContentTypeHeader(conn);
|
||||
if (contentType != null && !contentType.contentType.equals("text/html"))
|
||||
return null; // don't continue if not HTML
|
||||
else {
|
||||
// determine the charset, or use the default
|
||||
Charset charset = getCharset(contentType);
|
||||
if (charset == null)
|
||||
charset = Charset.defaultCharset();
|
||||
|
||||
// read the response body, using BufferedReader for performance
|
||||
try (InputStream in = conn.getInputStream();
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
|
||||
) {
|
||||
|
||||
int n = 0, totalRead = 0;
|
||||
char[] buf = new char[1024];
|
||||
StringBuilder content = new StringBuilder();
|
||||
|
||||
// read until EOF or first 8192 characters
|
||||
while (totalRead < MAX_READ_BYTES && (n = reader.read(buf, 0, buf.length)) != -1) {
|
||||
content.append(buf, 0, n);
|
||||
totalRead += n;
|
||||
}
|
||||
reader.close();
|
||||
|
||||
// extract the title
|
||||
Matcher matcher = TITLE_TAG.matcher(content);
|
||||
if (matcher.find()) {
|
||||
/* replace any occurrences of whitespace (which may
|
||||
* include line feeds and other uglies) as well
|
||||
* as HTML brackets with a space */
|
||||
return matcher.group(1).replaceAll("[\\s\\<>]+", " ").trim();
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loops through response headers until Content-Type is found.
|
||||
* @param conn
|
||||
* @return ContentType object representing the value of
|
||||
* the Content-Type header
|
||||
*/
|
||||
private static ContentType getContentTypeHeader(URLConnection conn) {
|
||||
int i = 0;
|
||||
boolean moreHeaders = true;
|
||||
do {
|
||||
String headerName = conn.getHeaderFieldKey(i);
|
||||
String headerValue = conn.getHeaderField(i);
|
||||
if (headerName != null && headerName.equals("Content-Type"))
|
||||
return new ContentType(headerValue);
|
||||
|
||||
i++;
|
||||
moreHeaders = headerName != null || headerValue != null;
|
||||
}
|
||||
while (moreHeaders);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static Charset getCharset(ContentType contentType) {
|
||||
if (contentType != null && contentType.charsetName != null && Charset.isSupported(contentType.charsetName))
|
||||
return Charset.forName(contentType.charsetName);
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Class holds the content type and charset (if present)
|
||||
*/
|
||||
private static final class ContentType {
|
||||
private static final Pattern CHARSET_HEADER = Pattern.compile("charset=([-_a-zA-Z0-9]+)", Pattern.CASE_INSENSITIVE|Pattern.DOTALL);
|
||||
|
||||
private String contentType;
|
||||
private String charsetName;
|
||||
private ContentType(String headerValue) {
|
||||
if (headerValue == null)
|
||||
throw new IllegalArgumentException("ContentType must be constructed with a not-null headerValue");
|
||||
int n = headerValue.indexOf(";");
|
||||
if (n != -1) {
|
||||
contentType = headerValue.substring(0, n);
|
||||
Matcher matcher = CHARSET_HEADER.matcher(headerValue);
|
||||
if (matcher.find())
|
||||
charsetName = matcher.group(1);
|
||||
}
|
||||
else
|
||||
contentType = headerValue;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
jircbot.version=2.2.0
|
||||
jircbot.version=2.2.1
|
||||
|
||||
jircbot.config=config.json
|
||||
|
||||
|
|
Loading…
Reference in New Issue