From 6faa845afd5763b9398d040fdaf974cbd791540b Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Tue, 25 Jun 2024 22:38:29 +0200 Subject: [PATCH] Migrated `/file` route. Streaming content from a Flask route wrapped into a Tornado route is a buffering nightmare. `/file` has now been migrated to a pure Tornado asynchronous route instead. --- platypush/backend/http/app/routes/file.py | 68 ---------- platypush/backend/http/app/streaming/_base.py | 1 + .../http/app/streaming/plugins/file.py | 121 ++++++++++++++++++ 3 files changed, 122 insertions(+), 68 deletions(-) delete mode 100644 platypush/backend/http/app/routes/file.py create mode 100644 platypush/backend/http/app/streaming/plugins/file.py diff --git a/platypush/backend/http/app/routes/file.py b/platypush/backend/http/app/routes/file.py deleted file mode 100644 index d392487aa2..0000000000 --- a/platypush/backend/http/app/routes/file.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -import re -from typing import Generator - -from flask import Blueprint, abort, request -from flask.wrappers import Response - -from platypush.backend.http.app import template_folder -from platypush.backend.http.app.utils import authenticate, logger -from platypush.utils import get_mime_type - -file = Blueprint('file', __name__, template_folder=template_folder) - -# Declare routes list -__routes__ = [ - file, -] - - -@file.route('/file', methods=['GET', 'HEAD']) -@authenticate() -def get_file_route(): - """ - Endpoint to read the content of a file on the server. - """ - - def read_file(path: str) -> Generator[bytes, None, None]: - with open(path, 'rb') as f: - yield from iter(lambda: f.read(4096), b'') - - path = os.sep + os.path.join( - *[ - token - for token in re.sub( - r'^\.\./', - '', - re.sub( - r'^\./', - '', - request.args.get('path', '').lstrip(os.sep).lstrip(' ') or '', - ), - ).split(os.sep) - if token - ] - ) - - logger().debug('Received file read request for %r', request.path) - - if not os.path.isfile(path): - logger().warning('File not found: %r', path) - abort(404, 'File not found') - - try: - headers = { - 'Content-Length': str(os.path.getsize(path)), - 'Content-Type': (get_mime_type(path) or 'application/octet-stream'), - } - - if request.method == 'HEAD': - return Response(status=200, headers=headers) - - return read_file(path), 200, headers - except PermissionError: - logger().warning('Permission denied to read file %r', path) - abort(403, 'Permission denied') - - -# vim:sw=4:ts=4:et: diff --git a/platypush/backend/http/app/streaming/_base.py b/platypush/backend/http/app/streaming/_base.py index aea3d58d5a..862550324f 100644 --- a/platypush/backend/http/app/streaming/_base.py +++ b/platypush/backend/http/app/streaming/_base.py @@ -42,6 +42,7 @@ class StreamingRoute(RequestHandler, PubSubMixin, ABC): Make sure that errors are always returned in JSON format. """ self.set_header("Content-Type", "application/json") + self.set_status(status_code) self.finish( json.dumps( {"status": status_code, "error": error or responses.get(status_code)} diff --git a/platypush/backend/http/app/streaming/plugins/file.py b/platypush/backend/http/app/streaming/plugins/file.py new file mode 100644 index 0000000000..b841bfa57f --- /dev/null +++ b/platypush/backend/http/app/streaming/plugins/file.py @@ -0,0 +1,121 @@ +import os +from contextlib import contextmanager +from datetime import datetime as dt +from typing import Optional, Tuple + +from tornado.web import stream_request_body + +from platypush.utils import get_mime_type + +from .. import StreamingRoute + + +@stream_request_body +class FileRoute(StreamingRoute): + """ + Generic route to read the content of a file on the server. + """ + + BUFSIZE = 1024 + + @classmethod + def path(cls) -> str: + """ + Route: GET /file?path=[&download] + """ + return r"^/file$" + + @property + def download(self) -> bool: + return 'download' in self.request.arguments + + @property + def file_path(self) -> str: + return os.path.expanduser( + self.request.arguments.get('path', [b''])[0].decode('utf-8') + ) + + @property + def file_size(self) -> int: + return os.path.getsize(self.file_path) + + @property + def range(self) -> Tuple[Optional[int], Optional[int]]: + range_hdr = self.request.headers.get('Range') + if not range_hdr: + return None, None + + start, end = range_hdr.split('=')[-1].split('-') + start = int(start) if start else 0 + end = int(end) if end else self.file_size - 1 + return start, end + + def set_headers(self): + self.set_header('Content-Length', str(os.path.getsize(self.file_path))) + self.set_header( + 'Content-Type', get_mime_type(self.file_path) or 'application/octet-stream' + ) + self.set_header('Accept-Ranges', 'bytes') + self.set_header( + 'Last-Modified', + dt.fromtimestamp(os.path.getmtime(self.file_path)).strftime( + '%a, %d %b %Y %H:%M:%S GMT' + ), + ) + + if self.download: + self.set_header( + 'Content-Disposition', + f'attachment; filename="{os.path.basename(self.file_path)}"', + ) + + if self.range[0] is not None: + start, end = self.range + self.set_header( + 'Content-Range', + f'bytes {start}-{end}/{self.file_size}', + ) + self.set_status(206) + + @contextmanager + def _serve(self): + path = self.file_path + if not path: + self.write_error(400, 'Missing path argument') + return + + self.logger.debug('Received file read request for %r', path) + + try: + with open(path, 'rb') as f: + self.set_headers() + yield f + except FileNotFoundError: + self.write_error(404, 'File not found') + yield + return + except PermissionError: + self.write_error(403, 'Permission denied') + yield + return + except Exception as e: + self.write_error(500, str(e)) + yield + return + + self.finish() + + def get(self) -> None: + with self._serve() as f: + if f: + while True: + chunk = f.read(self.BUFSIZE) + if not chunk: + break + + self.write(chunk) + self.flush() + + def head(self) -> None: + with self._serve(): + pass