From de5b3ad932a708f1294766cd9427394d2acc65b5 Mon Sep 17 00:00:00 2001 From: bvn13 Date: Wed, 16 Oct 2024 01:28:07 +0300 Subject: [PATCH] added web server in order to browse all feeds available --- pyproject.toml | 3 ++ src/app.py | 62 +++++++++++++++++++++++----------------- src/fetcher.py | 33 +++++++++++++++++++++ src/server.py | 38 ++++++++++++++++++++++++ src/templates/feed.html | 1 + src/templates/index.html | 17 +++++++++++ 6 files changed, 128 insertions(+), 26 deletions(-) create mode 100644 src/fetcher.py create mode 100644 src/server.py create mode 100644 src/templates/feed.html create mode 100644 src/templates/index.html diff --git a/pyproject.toml b/pyproject.toml index cabb888..27ed867 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,9 @@ readme = "README.md" python = "^3.12" requests = "^2.32.3" urllib3 = "^2.2.3" +tornado = "^6.4.1" +asyncio = "^3.4.3" +scheduler = "^0.8.7" [build-system] diff --git a/src/app.py b/src/app.py index 9ee69be..22c4832 100644 --- a/src/app.py +++ b/src/app.py @@ -1,10 +1,13 @@ import argparse +import datetime as dt import json +from logging import DEBUG +import time +import threading +from scheduler import Scheduler -import requests.exceptions -from requests import Session -from requests.adapters import HTTPAdapter -from urllib3.util import Retry +from fetcher import fetch +from server import start_server from src.logger import logger as l @@ -14,8 +17,16 @@ logger.info("Starting") parser = argparse.ArgumentParser(__name__) parser.add_argument('-s', '--settings', required=True, help='Settings JSON') parser.add_argument('-d', '--directory', required=True, help='Directory to store XMLs') +parser.add_argument('-i', '--check-interval', required=False, default=1, help='Interval to fetch XMLs (in minutes)') +parser.add_argument('-p', '--port', required=False, default=8001, help='Port to listen') +parser.add_argument('--debug', required=False, default=False, action=argparse.BooleanOptionalAction, help="Debug") args = parser.parse_args() +assert args.check_interval >= 1 + +if args.debug: + logger.setLevel(DEBUG) + logger.info("Loading settings from settings JSON") settings = [] with open(args.settings) as json_file: @@ -23,25 +34,24 @@ with open(args.settings) as json_file: assert type(settings) == list -for sets in settings: - logger.info(f"Working set: {sets}") - assert type(sets) == dict - assert 'src' in sets - assert 'rss' in sets - s = Session() - retries = Retry( - total=3, - backoff_factor=0.1, - status_forcelist=[502, 503, 504], - allowed_methods={'GET'}, - ) - s.mount(sets['src'], HTTPAdapter(max_retries=retries)) - try: - r = s.get(sets['src'], timeout=3, stream=True) - if r.status_code == 200: - logger.info(f"Saving to file: {sets['rss']}") - with open("{0}/{1}".format(args.directory, sets['rss']), 'wb') as rss: - for chunk in r: - rss.write(chunk) - except requests.exceptions.ConnectionError as e: - logger.warning(f"Unable to fetch {sets['src']}", e) \ No newline at end of file + +def _do_fetch(): + fetch(settings=settings, args=args) + +def _start_scheduling(): + logger.info('Setting Scheduler up') + schedule = Scheduler() + schedule.cyclic(dt.timedelta(seconds=args.check_interval*60), _do_fetch) + logger.info(schedule) + while True: + schedule.exec_jobs() + time.sleep(1) + +def _start_web_server(): + start_server(args=args) + + +if __name__ == "__main__": + t1 = threading.Thread(target=_start_scheduling) + t1.start() + _start_web_server() \ No newline at end of file diff --git a/src/fetcher.py b/src/fetcher.py new file mode 100644 index 0000000..f66727e --- /dev/null +++ b/src/fetcher.py @@ -0,0 +1,33 @@ +from argparse import Namespace + +import requests.exceptions +from requests import Session +from requests.adapters import HTTPAdapter +from urllib3.util import Retry +from src.logger import logger as l + + +def fetch(settings: list, args: Namespace) -> None: + logger = l.getChild(__name__) + for sets in settings: + logger.info(f"Working set: {sets}") + assert type(sets) == dict + assert 'src' in sets + assert 'rss' in sets + s = Session() + retries = Retry( + total=3, + backoff_factor=0.1, + status_forcelist=[502, 503, 504], + allowed_methods={'GET'}, + ) + s.mount(sets['src'], HTTPAdapter(max_retries=retries)) + try: + r = s.get(sets['src'], timeout=3, stream=True) + if r.status_code == 200: + logger.info(f"Saving to file: {sets['rss']}") + with open("{0}/{1}".format(args.directory, sets['rss']), 'wb') as rss: + for chunk in r: + rss.write(chunk) + except requests.exceptions.ConnectionError as e: + logger.warning(f"Unable to fetch {sets['src']}", e) diff --git a/src/server.py b/src/server.py new file mode 100644 index 0000000..841aacd --- /dev/null +++ b/src/server.py @@ -0,0 +1,38 @@ +import asyncio +import os, fnmatch +from argparse import Namespace +import tornado +from src.logger import logger as l + + +def start_server(args: Namespace) -> None: + logger = l.getChild(__name__) + + def _get_all_feeds(): + return [{ 'rss': f, 'file': f"/feeds/{f}" } for f in fnmatch.filter(os.listdir(args.directory), '*.xml')] + + class MainHandler(tornado.web.RequestHandler): + def set_default_headers(self): + self.set_header("Access-Control-Allow-Origin", "*") + self.set_header("Access-Control-Allow-Headers", "x-requested-with") + self.set_header('Access-Control-Allow-Methods', 'GET, OPTIONS') + + def get(self): + self.render("index.html", feeds=_get_all_feeds()) + + + async def start_web_server(): + logger.info(f"Starting web server on port {args.port}") + app = tornado.web.Application( + [ + (r"/", MainHandler), + (r'/feeds/(.*)', tornado.web.StaticFileHandler, {'path': args.directory}), + ], + template_path=os.path.join(os.path.dirname(__file__), "templates"), + static_path=args.directory, + debug=args.debug, + ) + app.listen(args.port) + await asyncio.Event().wait() + + asyncio.run(start_web_server()) \ No newline at end of file diff --git a/src/templates/feed.html b/src/templates/feed.html new file mode 100644 index 0000000..34b24b1 --- /dev/null +++ b/src/templates/feed.html @@ -0,0 +1 @@ +
  • {{ feed['rss'] }}
  • \ No newline at end of file diff --git a/src/templates/index.html b/src/templates/index.html new file mode 100644 index 0000000..9f0d88d --- /dev/null +++ b/src/templates/index.html @@ -0,0 +1,17 @@ + + + + + Tornado Chat Demo + + +
    +

    Feeds:

    +
      + {% for feed in feeds %} + {% module Template("feed.html", feed=feed) %} + {% end %} +
    +
    + + \ No newline at end of file