-
-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from D4Vinci/dev
v0.2.5
- Loading branch information
Showing
12 changed files
with
215 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
from scrapling.core.custom_types import TextHandler, AttributesHandler | ||
|
||
__author__ = "Karim Shoair ([email protected])" | ||
__version__ = "0.2.4" | ||
__version__ = "0.2.5" | ||
__copyright__ = "Copyright (c) 2024 Karim Shoair" | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[metadata] | ||
name = scrapling | ||
version = 0.2.4 | ||
version = 0.2.5 | ||
author = Karim Shoair | ||
author_email = [email protected] | ||
description = Scrapling is an undetectable, powerful, flexible, adaptive, and high-performance web scraping library for Python. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import unittest | ||
|
||
from scrapling.engines.toolbelt.custom import ResponseEncoding, StatusText | ||
|
||
|
||
class TestPlayWrightFetcher(unittest.TestCase): | ||
def setUp(self): | ||
self.content_type_map = { | ||
# A map generated by ChatGPT for most possible `content_type` values and the expected outcome | ||
'text/html; charset=UTF-8': 'UTF-8', | ||
'text/html; charset=ISO-8859-1': 'ISO-8859-1', | ||
'text/html': 'ISO-8859-1', | ||
'application/json; charset=UTF-8': 'UTF-8', | ||
'application/json': 'utf-8', | ||
'text/json': 'utf-8', | ||
'application/javascript; charset=UTF-8': 'UTF-8', | ||
'application/javascript': 'utf-8', | ||
'text/plain; charset=UTF-8': 'UTF-8', | ||
'text/plain; charset=ISO-8859-1': 'ISO-8859-1', | ||
'text/plain': 'ISO-8859-1', | ||
'application/xhtml+xml; charset=UTF-8': 'UTF-8', | ||
'application/xhtml+xml': 'utf-8', | ||
'text/html; charset=windows-1252': 'windows-1252', | ||
'application/json; charset=windows-1252': 'windows-1252', | ||
'text/plain; charset=windows-1252': 'windows-1252', | ||
'text/html; charset="UTF-8"': 'UTF-8', | ||
'text/html; charset="ISO-8859-1"': 'ISO-8859-1', | ||
'text/html; charset="windows-1252"': 'windows-1252', | ||
'application/json; charset="UTF-8"': 'UTF-8', | ||
'application/json; charset="ISO-8859-1"': 'ISO-8859-1', | ||
'application/json; charset="windows-1252"': 'windows-1252', | ||
'text/json; charset="UTF-8"': 'UTF-8', | ||
'application/javascript; charset="UTF-8"': 'UTF-8', | ||
'application/javascript; charset="ISO-8859-1"': 'ISO-8859-1', | ||
'text/plain; charset="UTF-8"': 'UTF-8', | ||
'text/plain; charset="ISO-8859-1"': 'ISO-8859-1', | ||
'text/plain; charset="windows-1252"': 'windows-1252', | ||
'application/xhtml+xml; charset="UTF-8"': 'UTF-8', | ||
'application/xhtml+xml; charset="ISO-8859-1"': 'ISO-8859-1', | ||
'application/xhtml+xml; charset="windows-1252"': 'windows-1252', | ||
'text/html; charset="US-ASCII"': 'US-ASCII', | ||
'application/json; charset="US-ASCII"': 'US-ASCII', | ||
'text/plain; charset="US-ASCII"': 'US-ASCII', | ||
'text/html; charset="Shift_JIS"': 'Shift_JIS', | ||
'application/json; charset="Shift_JIS"': 'Shift_JIS', | ||
'text/plain; charset="Shift_JIS"': 'Shift_JIS', | ||
'application/xml; charset="UTF-8"': 'UTF-8', | ||
'application/xml; charset="ISO-8859-1"': 'ISO-8859-1', | ||
'application/xml': 'utf-8', | ||
'text/xml; charset="UTF-8"': 'UTF-8', | ||
'text/xml; charset="ISO-8859-1"': 'ISO-8859-1', | ||
'text/xml': 'utf-8' | ||
} | ||
self.status_map = { | ||
100: "Continue", | ||
101: "Switching Protocols", | ||
102: "Processing", | ||
103: "Early Hints", | ||
200: "OK", | ||
201: "Created", | ||
202: "Accepted", | ||
203: "Non-Authoritative Information", | ||
204: "No Content", | ||
205: "Reset Content", | ||
206: "Partial Content", | ||
207: "Multi-Status", | ||
208: "Already Reported", | ||
226: "IM Used", | ||
300: "Multiple Choices", | ||
301: "Moved Permanently", | ||
302: "Found", | ||
303: "See Other", | ||
304: "Not Modified", | ||
305: "Use Proxy", | ||
307: "Temporary Redirect", | ||
308: "Permanent Redirect", | ||
400: "Bad Request", | ||
401: "Unauthorized", | ||
402: "Payment Required", | ||
403: "Forbidden", | ||
404: "Not Found", | ||
405: "Method Not Allowed", | ||
406: "Not Acceptable", | ||
407: "Proxy Authentication Required", | ||
408: "Request Timeout", | ||
409: "Conflict", | ||
410: "Gone", | ||
411: "Length Required", | ||
412: "Precondition Failed", | ||
413: "Payload Too Large", | ||
414: "URI Too Long", | ||
415: "Unsupported Media Type", | ||
416: "Range Not Satisfiable", | ||
417: "Expectation Failed", | ||
418: "I'm a teapot", | ||
421: "Misdirected Request", | ||
422: "Unprocessable Entity", | ||
423: "Locked", | ||
424: "Failed Dependency", | ||
425: "Too Early", | ||
426: "Upgrade Required", | ||
428: "Precondition Required", | ||
429: "Too Many Requests", | ||
431: "Request Header Fields Too Large", | ||
451: "Unavailable For Legal Reasons", | ||
500: "Internal Server Error", | ||
501: "Not Implemented", | ||
502: "Bad Gateway", | ||
503: "Service Unavailable", | ||
504: "Gateway Timeout", | ||
505: "HTTP Version Not Supported", | ||
506: "Variant Also Negotiates", | ||
507: "Insufficient Storage", | ||
508: "Loop Detected", | ||
510: "Not Extended", | ||
511: "Network Authentication Required" | ||
} | ||
|
||
def test_parsing_content_type(self): | ||
"""Test if parsing different types of content-type returns the expected result""" | ||
for header_value, expected_encoding in self.content_type_map.items(): | ||
self.assertEqual(ResponseEncoding.get_value(header_value), expected_encoding) | ||
|
||
def test_parsing_response_status(self): | ||
"""Test if using different http responses' status codes returns the expected result""" | ||
for status_code, expected_status_text in self.status_map.items(): | ||
self.assertEqual(StatusText.get(status_code), expected_status_text) | ||
|
||
self.assertEqual(StatusText.get(1000), "Unknown Status Code") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,4 @@ camoufox | |
werkzeug<3.0.0 | ||
pytest-httpbin==2.1.0 | ||
httpbin~=0.10.0 | ||
pytest-xdist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters