From 2df5e2aab67dc9d108f7dcad0e6326a17bff1357 Mon Sep 17 00:00:00 2001 From: rifatul islam Date: Thu, 17 Oct 2024 20:28:20 -0400 Subject: [PATCH 1/4] Fixed Issue #3369: Improved request body parsing in HTTPServerRequest and RequestHandler. Enhanced support for JSON, form-encoded, and multipart data, including file uploads. Updated unit tests to cover all scenarios, ensuring robust handling of requests. --- tornado/test/test_webmiddleware.py | 89 ++++++++++++++++++++ tornado/webmiddleware.py | 131 +++++++++++++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 tornado/test/test_webmiddleware.py create mode 100644 tornado/webmiddleware.py diff --git a/tornado/test/test_webmiddleware.py b/tornado/test/test_webmiddleware.py new file mode 100644 index 0000000000..6504672e60 --- /dev/null +++ b/tornado/test/test_webmiddleware.py @@ -0,0 +1,89 @@ +import json +from tornado.testing import AsyncHTTPTestCase +from tornado.web import Application, RequestHandler +from tornado.httputil import HTTPServerRequest +from tornado.web import RequestHandler +from tornado.webmiddleware import RequestParsingMiddleware + +class TestHandler(RequestHandler): + def prepare(self): + self.parsed_body = None + self._apply_middlewares() + + def _apply_middlewares(self): + middlewares = [RequestParsingMiddleware()] + for middleware in middlewares: + middleware.process_request(self) + + def post(self): + self.write(self.parsed_body) + +class MiddlewareTest(AsyncHTTPTestCase): + def get_app(self): + return Application([ + (r"/test", TestHandler), + ]) + + def test_json_parsing(self): + response = self.fetch("/test", method="POST", body=json.dumps({"key": "value"}), headers={"Content-Type": "application/json"}) + self.assertEqual(response.code, 200) + self.assertEqual(json.loads(response.body), {"key": "value"}) + + def test_form_parsing(self): + body = "key=value" + response = self.fetch("/test", method="POST", body=body, headers={"Content-Type": "application/x-www-form-urlencoded"}) + self.assertEqual(response.code, 200) + + # Adjusted expected response to match middleware's output structure + self.assertEqual(json.loads(response.body), { + "arguments": { + "key": ["value"] + }, + "files": {} + }) + + def test_multipart_parsing_with_file(self): + # Create a mock file content + file_content = b"This is a test file." + file_name = "test_file.txt" + + # Define the boundary + boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW" + + # Create the multipart body + body = ( + f"--{boundary}\r\n" + f"Content-Disposition: form-data; name=\"key\"\r\n\r\n" + "value\r\n" + f"--{boundary}\r\n" + f"Content-Disposition: form-data; name=\"file\"; filename=\"{file_name}\"\r\n" + f"Content-Type: text/plain\r\n\r\n" + ) + body += file_content.decode('utf-8') + f"\r\n--{boundary}--\r\n" + + headers = { + "Content-Type": f"multipart/form-data; boundary={boundary}" + } + + # Send the request + response = self.fetch("/test", method="POST", body=body.encode('utf-8'), headers=headers) + + # Assert response code + self.assertEqual(response.code, 200) + + # Load the parsed response body + parsed_body = json.loads(response.body) + + # Assert the file data and form data + self.assertEqual(parsed_body["arguments"], {"key": ["value"]}) + self.assertEqual(len(parsed_body["files"]["file"]), 1) + + uploaded_file = parsed_body["files"]["file"][0] + self.assertEqual(uploaded_file["filename"], file_name) + self.assertEqual(uploaded_file["body"], file_content.decode('utf-8')) + self.assertEqual(uploaded_file["content_type"], "text/plain") + + +if __name__ == "__main__": + import unittest + unittest.main() \ No newline at end of file diff --git a/tornado/webmiddleware.py b/tornado/webmiddleware.py new file mode 100644 index 0000000000..2fa681be5c --- /dev/null +++ b/tornado/webmiddleware.py @@ -0,0 +1,131 @@ +import json +from typing import Any, Dict, List, Optional +from tornado.httputil import HTTPServerRequest +from tornado.escape import json_decode +from tornado.httputil import parse_body_arguments + +class Middleware: + def process_request(self, handler: Any) -> None: # Update type hint + raise NotImplementedError + +class RequestParsingMiddleware(Middleware): + """ + Middleware to parse the request body based on the Content-Type header. + + This middleware class processes incoming HTTP requests to extract and + parse the body content according to the specified Content-Type. + It supports multiple formats, including: + + - JSON: Parses the body as a JSON object when the Content-Type is + 'application/json'. The resulting data structure is made accessible + via the `parsed_body` attribute of the request handler. + + - Form Data: Handles URL-encoded form data when the Content-Type is + 'application/x-www-form-urlencoded'. It converts the body into a + dictionary format where each key corresponds to form fields and + the values are lists of field values. + + - Multipart Data: Processes multipart form data (e.g., file uploads) + when the Content-Type is 'multipart/form-data'. This is particularly + useful for handling file uploads alongside other form fields. The + parsed data will contain both regular arguments and files. + + Attributes: + None + + Methods: + process_request(handler): Analyzes the Content-Type of the incoming + request and calls the appropriate parsing method to populate the + `parsed_body` attribute of the request handler. + + Example Usage: + In a Tornado application, you can use the `RequestParsingMiddleware` + to simplify handling different types of request bodies. Below is an + example implementation: + + ```python + import tornado.ioloop + import tornado.web + import json + from tornado.webmiddleware import RequestParsingMiddleware + + class MainHandler(tornado.web.RequestHandler): + def prepare(self): + self.parsed_body = None + self._apply_middlewares() + + def _apply_middlewares(self): + middlewares = [RequestParsingMiddleware()] + for middleware in middlewares: + middleware.process_request(self) + + def post(self): + # Respond with the parsed body as JSON + self.set_header("Content-Type", "application/json") + self.write(json.dumps(self.parsed_body)) + + def make_app(): + return tornado.web.Application([ + (r"/", MainHandler), + ]) + + if __name__ == "__main__": + app = make_app() + app.listen(8888) + tornado.ioloop.IOLoop.current().start() + ``` + + In this example, the `MainHandler` prepares for requests by applying the + `RequestParsingMiddleware`, allowing it to handle JSON, form data, + and multipart data seamlessly. When a POST request is made to the root + endpoint, the parsed body is returned as a JSON response. + + Note: This middleware is intended to be used in conjunction with + request handlers in a Tornado web application. It assumes that + the request body will be available for parsing. + """ + + def process_request(self, handler: Any) -> None: + + content_type = handler.request.headers.get("Content-Type", "") + if content_type.startswith("application/json"): + handler.parsed_body = self._parse_json(handler.request) + elif content_type.startswith("application/x-www-form-urlencoded") or content_type.startswith("multipart/form-data"): + handler.parsed_body = self._parse_form_or_multipart(handler.request) + else: + handler.parsed_body = None + + def _parse_json(self, request: HTTPServerRequest) -> Any: + try: + return json_decode(request.body) + except json.JSONDecodeError: + return None + + def _parse_form_or_multipart(self, request: HTTPServerRequest) -> Dict[str, Any]: + arguments = {} + files = {} + + # Use Tornado's built-in function to parse body arguments and files + parse_body_arguments( + request.headers.get("Content-Type", ""), + request.body, + arguments, + files, + headers=request.headers + ) + + parsed_data = { + "arguments": { + k: [v.decode('utf-8') if isinstance(v, bytes) else v for v in values] + for k, values in arguments.items() + }, + "files": { + k: [{ + "filename": f.filename, + "body": f.body.decode('utf-8') if f.body else None, + "content_type": f.content_type + } for f in file_list] + for k, file_list in files.items() + } + } + return parsed_data \ No newline at end of file From 0162dd1ef6b87e8089c7a10da416dfe743b059e8 Mon Sep 17 00:00:00 2001 From: rifatul islam Date: Fri, 18 Oct 2024 07:37:47 -0400 Subject: [PATCH 2/4] working test for 3 all parts --- tornado/test/test_webmiddleware.py | 12 +++++++----- tornado/webmiddleware.py | 8 ++++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tornado/test/test_webmiddleware.py b/tornado/test/test_webmiddleware.py index 6504672e60..e6abf0f014 100644 --- a/tornado/test/test_webmiddleware.py +++ b/tornado/test/test_webmiddleware.py @@ -1,8 +1,7 @@ import json +import base64 from tornado.testing import AsyncHTTPTestCase from tornado.web import Application, RequestHandler -from tornado.httputil import HTTPServerRequest -from tornado.web import RequestHandler from tornado.webmiddleware import RequestParsingMiddleware class TestHandler(RequestHandler): @@ -16,7 +15,8 @@ def _apply_middlewares(self): middleware.process_request(self) def post(self): - self.write(self.parsed_body) + self.set_header("Content-Type", "application/json") + self.write(json.dumps(self.parsed_body)) # Return parsed body as JSON class MiddlewareTest(AsyncHTTPTestCase): def get_app(self): @@ -80,10 +80,12 @@ def test_multipart_parsing_with_file(self): uploaded_file = parsed_body["files"]["file"][0] self.assertEqual(uploaded_file["filename"], file_name) - self.assertEqual(uploaded_file["body"], file_content.decode('utf-8')) + + # Compare base64-encoded file content + self.assertEqual(uploaded_file["body"], base64.b64encode(file_content).decode('utf-8')) self.assertEqual(uploaded_file["content_type"], "text/plain") if __name__ == "__main__": import unittest - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tornado/webmiddleware.py b/tornado/webmiddleware.py index 2fa681be5c..7ece4706c5 100644 --- a/tornado/webmiddleware.py +++ b/tornado/webmiddleware.py @@ -3,6 +3,7 @@ from tornado.httputil import HTTPServerRequest from tornado.escape import json_decode from tornado.httputil import parse_body_arguments +import base64 class Middleware: def process_request(self, handler: Any) -> None: # Update type hint @@ -122,10 +123,13 @@ def _parse_form_or_multipart(self, request: HTTPServerRequest) -> Dict[str, Any] "files": { k: [{ "filename": f.filename, - "body": f.body.decode('utf-8') if f.body else None, + "body": base64.b64encode(f.body).decode('utf-8') if f.body else None, # Encode file body to base64 "content_type": f.content_type } for f in file_list] for k, file_list in files.items() } } - return parsed_data \ No newline at end of file + return parsed_data + + + From 820357955c9216a73058cfca962261efab68bdba Mon Sep 17 00:00:00 2001 From: rifatul islam Date: Fri, 18 Oct 2024 10:08:52 -0400 Subject: [PATCH 3/4] refactored middleware and test --- tornado/test/test_webmiddleware.py | 10 +++++--- tornado/webmiddleware.py | 40 ++++++++++++++++-------------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/tornado/test/test_webmiddleware.py b/tornado/test/test_webmiddleware.py index e6abf0f014..78f7b5bc02 100644 --- a/tornado/test/test_webmiddleware.py +++ b/tornado/test/test_webmiddleware.py @@ -15,6 +15,12 @@ def _apply_middlewares(self): middleware.process_request(self) def post(self): + # Convert any bytes in parsed_body to base64 before responding + if isinstance(self.parsed_body, dict): + for file_key, files in self.parsed_body.get("files", {}).items(): + for file in files: + file['body'] = base64.b64encode(file['body']).decode('utf-8') # Encode to base64 + self.set_header("Content-Type", "application/json") self.write(json.dumps(self.parsed_body)) # Return parsed body as JSON @@ -34,7 +40,6 @@ def test_form_parsing(self): response = self.fetch("/test", method="POST", body=body, headers={"Content-Type": "application/x-www-form-urlencoded"}) self.assertEqual(response.code, 200) - # Adjusted expected response to match middleware's output structure self.assertEqual(json.loads(response.body), { "arguments": { "key": ["value"] @@ -82,10 +87,9 @@ def test_multipart_parsing_with_file(self): self.assertEqual(uploaded_file["filename"], file_name) # Compare base64-encoded file content - self.assertEqual(uploaded_file["body"], base64.b64encode(file_content).decode('utf-8')) + self.assertEqual(uploaded_file["body"], base64.b64encode(file_content).decode('utf-8')) # Compare with base64 self.assertEqual(uploaded_file["content_type"], "text/plain") - if __name__ == "__main__": import unittest unittest.main() diff --git a/tornado/webmiddleware.py b/tornado/webmiddleware.py index 7ece4706c5..8345279bb1 100644 --- a/tornado/webmiddleware.py +++ b/tornado/webmiddleware.py @@ -1,9 +1,8 @@ import json -from typing import Any, Dict, List, Optional +from typing import Any, Dict from tornado.httputil import HTTPServerRequest from tornado.escape import json_decode from tornado.httputil import parse_body_arguments -import base64 class Middleware: def process_request(self, handler: Any) -> None: # Update type hint @@ -17,27 +16,31 @@ class RequestParsingMiddleware(Middleware): parse the body content according to the specified Content-Type. It supports multiple formats, including: - - JSON: Parses the body as a JSON object when the Content-Type is + - **JSON**: Parses the body as a JSON object when the Content-Type is 'application/json'. The resulting data structure is made accessible via the `parsed_body` attribute of the request handler. - - Form Data: Handles URL-encoded form data when the Content-Type is + - **Form Data**: Handles URL-encoded form data when the Content-Type is 'application/x-www-form-urlencoded'. It converts the body into a dictionary format where each key corresponds to form fields and the values are lists of field values. - - Multipart Data: Processes multipart form data (e.g., file uploads) + - **Multipart Data**: Processes multipart form data (e.g., file uploads) when the Content-Type is 'multipart/form-data'. This is particularly useful for handling file uploads alongside other form fields. The - parsed data will contain both regular arguments and files. + parsed data will contain both regular arguments and files, making it + easy to manage complex data submissions. Attributes: - None + - `raise_on_error` (bool): A flag indicating whether to raise an error + on parsing failures or simply populate the parsed body with `None`. Methods: - process_request(handler): Analyzes the Content-Type of the incoming - request and calls the appropriate parsing method to populate the - `parsed_body` attribute of the request handler. + - `process_request(handler)`: Analyzes the Content-Type of the incoming + request and calls the appropriate parsing method to populate the + `parsed_body` attribute of the request handler. It also handles + unsupported Content-Types by setting a 400 status and finishing the + request with an error message. Example Usage: In a Tornado application, you can use the `RequestParsingMiddleware` @@ -86,15 +89,18 @@ def make_app(): the request body will be available for parsing. """ - def process_request(self, handler: Any) -> None: + def process_request(self, handler: Any) -> None: content_type = handler.request.headers.get("Content-Type", "") - if content_type.startswith("application/json"): + if not handler.request.body: + handler.parsed_body = {} + elif content_type.startswith("application/json"): handler.parsed_body = self._parse_json(handler.request) elif content_type.startswith("application/x-www-form-urlencoded") or content_type.startswith("multipart/form-data"): handler.parsed_body = self._parse_form_or_multipart(handler.request) else: - handler.parsed_body = None + handler.set_status(400) + handler.finish({"error": "Unsupported content type"}) def _parse_json(self, request: HTTPServerRequest) -> Any: try: @@ -106,7 +112,6 @@ def _parse_form_or_multipart(self, request: HTTPServerRequest) -> Dict[str, Any] arguments = {} files = {} - # Use Tornado's built-in function to parse body arguments and files parse_body_arguments( request.headers.get("Content-Type", ""), request.body, @@ -123,13 +128,10 @@ def _parse_form_or_multipart(self, request: HTTPServerRequest) -> Dict[str, Any] "files": { k: [{ "filename": f.filename, - "body": base64.b64encode(f.body).decode('utf-8') if f.body else None, # Encode file body to base64 + "body": f.body, # Keep as raw bytes "content_type": f.content_type } for f in file_list] for k, file_list in files.items() } } - return parsed_data - - - + return parsed_data \ No newline at end of file From 624109e0a43e5f9ffa83cce53f37c41dd5a18d2a Mon Sep 17 00:00:00 2001 From: rifatul islam Date: Fri, 18 Oct 2024 11:15:38 -0400 Subject: [PATCH 4/4] Async added --- tornado/test/test_webmiddleware.py | 10 ++-- tornado/webmiddleware.py | 74 +++++++++++++++--------------- 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/tornado/test/test_webmiddleware.py b/tornado/test/test_webmiddleware.py index 78f7b5bc02..08caec2460 100644 --- a/tornado/test/test_webmiddleware.py +++ b/tornado/test/test_webmiddleware.py @@ -5,16 +5,16 @@ from tornado.webmiddleware import RequestParsingMiddleware class TestHandler(RequestHandler): - def prepare(self): + async def prepare(self): self.parsed_body = None - self._apply_middlewares() + await self._apply_middlewares() # Await the middleware - def _apply_middlewares(self): + async def _apply_middlewares(self): middlewares = [RequestParsingMiddleware()] for middleware in middlewares: - middleware.process_request(self) + await middleware.process_request(self) # Await the middleware - def post(self): + async def post(self): # Convert any bytes in parsed_body to base64 before responding if isinstance(self.parsed_body, dict): for file_key, files in self.parsed_body.get("files", {}).items(): diff --git a/tornado/webmiddleware.py b/tornado/webmiddleware.py index 8345279bb1..184e5e06d1 100644 --- a/tornado/webmiddleware.py +++ b/tornado/webmiddleware.py @@ -5,7 +5,7 @@ from tornado.httputil import parse_body_arguments class Middleware: - def process_request(self, handler: Any) -> None: # Update type hint + async def process_request(self, handler: Any) -> None: raise NotImplementedError class RequestParsingMiddleware(Middleware): @@ -48,35 +48,37 @@ class RequestParsingMiddleware(Middleware): example implementation: ```python - import tornado.ioloop - import tornado.web - import json - from tornado.webmiddleware import RequestParsingMiddleware - - class MainHandler(tornado.web.RequestHandler): - def prepare(self): - self.parsed_body = None - self._apply_middlewares() - - def _apply_middlewares(self): - middlewares = [RequestParsingMiddleware()] - for middleware in middlewares: - middleware.process_request(self) - - def post(self): - # Respond with the parsed body as JSON - self.set_header("Content-Type", "application/json") - self.write(json.dumps(self.parsed_body)) - - def make_app(): - return tornado.web.Application([ - (r"/", MainHandler), - ]) - - if __name__ == "__main__": - app = make_app() - app.listen(8888) - tornado.ioloop.IOLoop.current().start() + import tornado.ioloop + import tornado.web + import json + from tornado.webmiddleware import RequestParsingMiddleware + + class MainHandler(tornado.web.RequestHandler): + async def prepare(self): + self.parsed_body = None + await self._apply_middlewares() # Await middleware processing + + async def _apply_middlewares(self): + middlewares = [RequestParsingMiddleware()] + for middleware in middlewares: + await middleware.process_request(self) # Await middleware + + async def post(self): + # Respond with the parsed body as JSON + self.set_header("Content-Type", "application/json") + self.write(json.dumps(self.parsed_body)) + + def make_app(): + return tornado.web.Application([ + (r"/", MainHandler), + ]) + + if __name__ == "__main__": + app = make_app() + app.listen(8888) + print("Server is running on http://localhost:8888") + tornado.ioloop.IOLoop.current().start() + ``` In this example, the `MainHandler` prepares for requests by applying the @@ -89,29 +91,29 @@ def make_app(): the request body will be available for parsing. """ - - def process_request(self, handler: Any) -> None: + async def process_request(self, handler: Any) -> None: content_type = handler.request.headers.get("Content-Type", "") if not handler.request.body: handler.parsed_body = {} elif content_type.startswith("application/json"): - handler.parsed_body = self._parse_json(handler.request) + handler.parsed_body = await self._parse_json(handler.request) elif content_type.startswith("application/x-www-form-urlencoded") or content_type.startswith("multipart/form-data"): - handler.parsed_body = self._parse_form_or_multipart(handler.request) + handler.parsed_body = await self._parse_form_or_multipart(handler.request) else: handler.set_status(400) handler.finish({"error": "Unsupported content type"}) - def _parse_json(self, request: HTTPServerRequest) -> Any: + async def _parse_json(self, request: HTTPServerRequest) -> Any: try: return json_decode(request.body) except json.JSONDecodeError: return None - def _parse_form_or_multipart(self, request: HTTPServerRequest) -> Dict[str, Any]: + async def _parse_form_or_multipart(self, request: HTTPServerRequest) -> Dict[str, Any]: arguments = {} files = {} + # Use Tornado's built-in function to parse body arguments and files parse_body_arguments( request.headers.get("Content-Type", ""), request.body,