added web server in order to browse all feeds available

This commit is contained in:
bvn13 2024-10-16 01:28:07 +03:00
parent cf677c0787
commit de5b3ad932
6 changed files with 128 additions and 26 deletions

View File

@ -9,6 +9,9 @@ readme = "README.md"
python = "^3.12" python = "^3.12"
requests = "^2.32.3" requests = "^2.32.3"
urllib3 = "^2.2.3" urllib3 = "^2.2.3"
tornado = "^6.4.1"
asyncio = "^3.4.3"
scheduler = "^0.8.7"
[build-system] [build-system]

View File

@ -1,10 +1,13 @@
import argparse import argparse
import datetime as dt
import json import json
from logging import DEBUG
import time
import threading
from scheduler import Scheduler
import requests.exceptions from fetcher import fetch
from requests import Session from server import start_server
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
from src.logger import logger as l from src.logger import logger as l
@ -14,8 +17,16 @@ logger.info("Starting")
parser = argparse.ArgumentParser(__name__) parser = argparse.ArgumentParser(__name__)
parser.add_argument('-s', '--settings', required=True, help='Settings JSON') parser.add_argument('-s', '--settings', required=True, help='Settings JSON')
parser.add_argument('-d', '--directory', required=True, help='Directory to store XMLs') parser.add_argument('-d', '--directory', required=True, help='Directory to store XMLs')
parser.add_argument('-i', '--check-interval', required=False, default=1, help='Interval to fetch XMLs (in minutes)')
parser.add_argument('-p', '--port', required=False, default=8001, help='Port to listen')
parser.add_argument('--debug', required=False, default=False, action=argparse.BooleanOptionalAction, help="Debug")
args = parser.parse_args() args = parser.parse_args()
assert args.check_interval >= 1
if args.debug:
logger.setLevel(DEBUG)
logger.info("Loading settings from settings JSON") logger.info("Loading settings from settings JSON")
settings = [] settings = []
with open(args.settings) as json_file: with open(args.settings) as json_file:
@ -23,25 +34,24 @@ with open(args.settings) as json_file:
assert type(settings) == list assert type(settings) == list
for sets in settings:
logger.info(f"Working set: {sets}") def _do_fetch():
assert type(sets) == dict fetch(settings=settings, args=args)
assert 'src' in sets
assert 'rss' in sets def _start_scheduling():
s = Session() logger.info('Setting Scheduler up')
retries = Retry( schedule = Scheduler()
total=3, schedule.cyclic(dt.timedelta(seconds=args.check_interval*60), _do_fetch)
backoff_factor=0.1, logger.info(schedule)
status_forcelist=[502, 503, 504], while True:
allowed_methods={'GET'}, schedule.exec_jobs()
) time.sleep(1)
s.mount(sets['src'], HTTPAdapter(max_retries=retries))
try: def _start_web_server():
r = s.get(sets['src'], timeout=3, stream=True) start_server(args=args)
if r.status_code == 200:
logger.info(f"Saving to file: {sets['rss']}")
with open("{0}/{1}".format(args.directory, sets['rss']), 'wb') as rss: if __name__ == "__main__":
for chunk in r: t1 = threading.Thread(target=_start_scheduling)
rss.write(chunk) t1.start()
except requests.exceptions.ConnectionError as e: _start_web_server()
logger.warning(f"Unable to fetch {sets['src']}", e)

33
src/fetcher.py Normal file
View File

@ -0,0 +1,33 @@
from argparse import Namespace
import requests.exceptions
from requests import Session
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
from src.logger import logger as l
def fetch(settings: list, args: Namespace) -> None:
logger = l.getChild(__name__)
for sets in settings:
logger.info(f"Working set: {sets}")
assert type(sets) == dict
assert 'src' in sets
assert 'rss' in sets
s = Session()
retries = Retry(
total=3,
backoff_factor=0.1,
status_forcelist=[502, 503, 504],
allowed_methods={'GET'},
)
s.mount(sets['src'], HTTPAdapter(max_retries=retries))
try:
r = s.get(sets['src'], timeout=3, stream=True)
if r.status_code == 200:
logger.info(f"Saving to file: {sets['rss']}")
with open("{0}/{1}".format(args.directory, sets['rss']), 'wb') as rss:
for chunk in r:
rss.write(chunk)
except requests.exceptions.ConnectionError as e:
logger.warning(f"Unable to fetch {sets['src']}", e)

38
src/server.py Normal file
View File

@ -0,0 +1,38 @@
import asyncio
import os, fnmatch
from argparse import Namespace
import tornado
from src.logger import logger as l
def start_server(args: Namespace) -> None:
logger = l.getChild(__name__)
def _get_all_feeds():
return [{ 'rss': f, 'file': f"/feeds/{f}" } for f in fnmatch.filter(os.listdir(args.directory), '*.xml')]
class MainHandler(tornado.web.RequestHandler):
def set_default_headers(self):
self.set_header("Access-Control-Allow-Origin", "*")
self.set_header("Access-Control-Allow-Headers", "x-requested-with")
self.set_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
def get(self):
self.render("index.html", feeds=_get_all_feeds())
async def start_web_server():
logger.info(f"Starting web server on port {args.port}")
app = tornado.web.Application(
[
(r"/", MainHandler),
(r'/feeds/(.*)', tornado.web.StaticFileHandler, {'path': args.directory}),
],
template_path=os.path.join(os.path.dirname(__file__), "templates"),
static_path=args.directory,
debug=args.debug,
)
app.listen(args.port)
await asyncio.Event().wait()
asyncio.run(start_web_server())

1
src/templates/feed.html Normal file
View File

@ -0,0 +1 @@
<li id="m{{ feed['rss'] }}"><a href="{% module linkify(feed['file']) %}">{{ feed['rss'] }}</a></li>

17
src/templates/index.html Normal file
View File

@ -0,0 +1,17 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Tornado Chat Demo</title>
</head>
<body>
<div id="body">
<p>Feeds:</p>
<ol>
{% for feed in feeds %}
{% module Template("feed.html", feed=feed) %}
{% end %}
</ol>
</div>
</body>
</html>