added web server in order to browse all feeds available
This commit is contained in:
parent
cf677c0787
commit
de5b3ad932
@ -9,6 +9,9 @@ readme = "README.md"
|
|||||||
python = "^3.12"
|
python = "^3.12"
|
||||||
requests = "^2.32.3"
|
requests = "^2.32.3"
|
||||||
urllib3 = "^2.2.3"
|
urllib3 = "^2.2.3"
|
||||||
|
tornado = "^6.4.1"
|
||||||
|
asyncio = "^3.4.3"
|
||||||
|
scheduler = "^0.8.7"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
62
src/app.py
62
src/app.py
@ -1,10 +1,13 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
import datetime as dt
|
||||||
import json
|
import json
|
||||||
|
from logging import DEBUG
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
from scheduler import Scheduler
|
||||||
|
|
||||||
import requests.exceptions
|
from fetcher import fetch
|
||||||
from requests import Session
|
from server import start_server
|
||||||
from requests.adapters import HTTPAdapter
|
|
||||||
from urllib3.util import Retry
|
|
||||||
from src.logger import logger as l
|
from src.logger import logger as l
|
||||||
|
|
||||||
|
|
||||||
@ -14,8 +17,16 @@ logger.info("Starting")
|
|||||||
parser = argparse.ArgumentParser(__name__)
|
parser = argparse.ArgumentParser(__name__)
|
||||||
parser.add_argument('-s', '--settings', required=True, help='Settings JSON')
|
parser.add_argument('-s', '--settings', required=True, help='Settings JSON')
|
||||||
parser.add_argument('-d', '--directory', required=True, help='Directory to store XMLs')
|
parser.add_argument('-d', '--directory', required=True, help='Directory to store XMLs')
|
||||||
|
parser.add_argument('-i', '--check-interval', required=False, default=1, help='Interval to fetch XMLs (in minutes)')
|
||||||
|
parser.add_argument('-p', '--port', required=False, default=8001, help='Port to listen')
|
||||||
|
parser.add_argument('--debug', required=False, default=False, action=argparse.BooleanOptionalAction, help="Debug")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
assert args.check_interval >= 1
|
||||||
|
|
||||||
|
if args.debug:
|
||||||
|
logger.setLevel(DEBUG)
|
||||||
|
|
||||||
logger.info("Loading settings from settings JSON")
|
logger.info("Loading settings from settings JSON")
|
||||||
settings = []
|
settings = []
|
||||||
with open(args.settings) as json_file:
|
with open(args.settings) as json_file:
|
||||||
@ -23,25 +34,24 @@ with open(args.settings) as json_file:
|
|||||||
|
|
||||||
assert type(settings) == list
|
assert type(settings) == list
|
||||||
|
|
||||||
for sets in settings:
|
|
||||||
logger.info(f"Working set: {sets}")
|
def _do_fetch():
|
||||||
assert type(sets) == dict
|
fetch(settings=settings, args=args)
|
||||||
assert 'src' in sets
|
|
||||||
assert 'rss' in sets
|
def _start_scheduling():
|
||||||
s = Session()
|
logger.info('Setting Scheduler up')
|
||||||
retries = Retry(
|
schedule = Scheduler()
|
||||||
total=3,
|
schedule.cyclic(dt.timedelta(seconds=args.check_interval*60), _do_fetch)
|
||||||
backoff_factor=0.1,
|
logger.info(schedule)
|
||||||
status_forcelist=[502, 503, 504],
|
while True:
|
||||||
allowed_methods={'GET'},
|
schedule.exec_jobs()
|
||||||
)
|
time.sleep(1)
|
||||||
s.mount(sets['src'], HTTPAdapter(max_retries=retries))
|
|
||||||
try:
|
def _start_web_server():
|
||||||
r = s.get(sets['src'], timeout=3, stream=True)
|
start_server(args=args)
|
||||||
if r.status_code == 200:
|
|
||||||
logger.info(f"Saving to file: {sets['rss']}")
|
|
||||||
with open("{0}/{1}".format(args.directory, sets['rss']), 'wb') as rss:
|
if __name__ == "__main__":
|
||||||
for chunk in r:
|
t1 = threading.Thread(target=_start_scheduling)
|
||||||
rss.write(chunk)
|
t1.start()
|
||||||
except requests.exceptions.ConnectionError as e:
|
_start_web_server()
|
||||||
logger.warning(f"Unable to fetch {sets['src']}", e)
|
|
33
src/fetcher.py
Normal file
33
src/fetcher.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from argparse import Namespace
|
||||||
|
|
||||||
|
import requests.exceptions
|
||||||
|
from requests import Session
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from urllib3.util import Retry
|
||||||
|
from src.logger import logger as l
|
||||||
|
|
||||||
|
|
||||||
|
def fetch(settings: list, args: Namespace) -> None:
|
||||||
|
logger = l.getChild(__name__)
|
||||||
|
for sets in settings:
|
||||||
|
logger.info(f"Working set: {sets}")
|
||||||
|
assert type(sets) == dict
|
||||||
|
assert 'src' in sets
|
||||||
|
assert 'rss' in sets
|
||||||
|
s = Session()
|
||||||
|
retries = Retry(
|
||||||
|
total=3,
|
||||||
|
backoff_factor=0.1,
|
||||||
|
status_forcelist=[502, 503, 504],
|
||||||
|
allowed_methods={'GET'},
|
||||||
|
)
|
||||||
|
s.mount(sets['src'], HTTPAdapter(max_retries=retries))
|
||||||
|
try:
|
||||||
|
r = s.get(sets['src'], timeout=3, stream=True)
|
||||||
|
if r.status_code == 200:
|
||||||
|
logger.info(f"Saving to file: {sets['rss']}")
|
||||||
|
with open("{0}/{1}".format(args.directory, sets['rss']), 'wb') as rss:
|
||||||
|
for chunk in r:
|
||||||
|
rss.write(chunk)
|
||||||
|
except requests.exceptions.ConnectionError as e:
|
||||||
|
logger.warning(f"Unable to fetch {sets['src']}", e)
|
38
src/server.py
Normal file
38
src/server.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import asyncio
|
||||||
|
import os, fnmatch
|
||||||
|
from argparse import Namespace
|
||||||
|
import tornado
|
||||||
|
from src.logger import logger as l
|
||||||
|
|
||||||
|
|
||||||
|
def start_server(args: Namespace) -> None:
|
||||||
|
logger = l.getChild(__name__)
|
||||||
|
|
||||||
|
def _get_all_feeds():
|
||||||
|
return [{ 'rss': f, 'file': f"/feeds/{f}" } for f in fnmatch.filter(os.listdir(args.directory), '*.xml')]
|
||||||
|
|
||||||
|
class MainHandler(tornado.web.RequestHandler):
|
||||||
|
def set_default_headers(self):
|
||||||
|
self.set_header("Access-Control-Allow-Origin", "*")
|
||||||
|
self.set_header("Access-Control-Allow-Headers", "x-requested-with")
|
||||||
|
self.set_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
|
||||||
|
|
||||||
|
def get(self):
|
||||||
|
self.render("index.html", feeds=_get_all_feeds())
|
||||||
|
|
||||||
|
|
||||||
|
async def start_web_server():
|
||||||
|
logger.info(f"Starting web server on port {args.port}")
|
||||||
|
app = tornado.web.Application(
|
||||||
|
[
|
||||||
|
(r"/", MainHandler),
|
||||||
|
(r'/feeds/(.*)', tornado.web.StaticFileHandler, {'path': args.directory}),
|
||||||
|
],
|
||||||
|
template_path=os.path.join(os.path.dirname(__file__), "templates"),
|
||||||
|
static_path=args.directory,
|
||||||
|
debug=args.debug,
|
||||||
|
)
|
||||||
|
app.listen(args.port)
|
||||||
|
await asyncio.Event().wait()
|
||||||
|
|
||||||
|
asyncio.run(start_web_server())
|
1
src/templates/feed.html
Normal file
1
src/templates/feed.html
Normal file
@ -0,0 +1 @@
|
|||||||
|
<li id="m{{ feed['rss'] }}"><a href="{% module linkify(feed['file']) %}">{{ feed['rss'] }}</a></li>
|
17
src/templates/index.html
Normal file
17
src/templates/index.html
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Tornado Chat Demo</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="body">
|
||||||
|
<p>Feeds:</p>
|
||||||
|
<ol>
|
||||||
|
{% for feed in feeds %}
|
||||||
|
{% module Template("feed.html", feed=feed) %}
|
||||||
|
{% end %}
|
||||||
|
</ol>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
Reference in New Issue
Block a user