Skip to content

Commit

Permalink
extract text was not being error checked properly
Browse files Browse the repository at this point in the history
  • Loading branch information
dgtlmoon committed Oct 26, 2023
1 parent 71e5056 commit 528eebd
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 5 deletions.
10 changes: 5 additions & 5 deletions changedetectionio/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
# each select <option data-enabled="enabled-0-0"
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config

from changedetectionio import content_fetcher
from changedetectionio import content_fetcher, html_tools

from changedetectionio.notification import (
valid_notification_formats,
)
Expand Down Expand Up @@ -284,11 +285,10 @@ def __init__(self, message=None):
def __call__(self, form, field):

for line in field.data:
if line[0] == '/' and line[-1] == '/':
# Because internally we dont wrap in /
line = line.strip('/')
if re.search(html_tools.PERL_STYLE_REGEX, line, re.IGNORECASE):
try:
re.compile(line)
regex = html_tools.perl_style_slash_enclosed_regex_to_options(line)
re.compile(regex)
except re.error:
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
raise ValidationError(message % (line))
Expand Down
32 changes: 32 additions & 0 deletions changedetectionio/tests/test_extract_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,35 @@ def test_check_filter_and_regex_extract(client, live_server):

# Should not be here
assert b'Some text that did change' not in res.data



def test_regex_error_handling(client, live_server):

#live_server_setup(live_server)

# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data

### test regex error handling
res = client.post(
url_for("edit_page", uuid="first"),
data={"extract_text": '/something bad\d{3/XYZ',
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)

with open('/tmp/fuck.html', 'wb') as f:
f.write(res.data)

assert b'is not a valid regular expression.' in res.data

res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

0 comments on commit 528eebd

Please sign in to comment.