diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index fd12393ad08..d58521b37bb 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -729,6 +729,12 @@ def edit_page(uuid): for p in datastore.proxy_list: form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label']))) + # Add some HTML to be used for form validation + if datastore.data['watching'][uuid].history.keys(): + timestamp = list(datastore.data['watching'][uuid].history.keys())[-1] + form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp) + else: + form.last_html_for_form_validation = "
" if request.method == 'POST' and form.validate(): diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index b0b19f99090..82591dbfe22 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -1,6 +1,9 @@ import os import re +import elementpath + +from changedetectionio.html_tools import xpath_filter, xpath1_filter from changedetectionio.strtobool import strtobool from wtforms import ( @@ -322,52 +325,39 @@ def __init__(self, message=None, allow_xpath=True, allow_json=True): self.allow_json = allow_json def __call__(self, form, field): - + from lxml.etree import XPathEvalError if isinstance(field.data, str): data = [field.data] else: data = field.data for line in data: - # Nothing to see here - if not len(line.strip()): - return - - # Does it look like XPath? - if line.strip()[0] == '/' or line.strip().startswith('xpath:'): - if not self.allow_xpath: - raise ValidationError("XPath not permitted in this field!") - from lxml import etree, html - import elementpath - # xpath 2.0-3.1 - from elementpath.xpath3 import XPath3Parser - tree = html.fromstring("") - line = line.replace('xpath:', '') + line = line.strip() - try: - elementpath.select(tree, line.strip(), parser=XPath3Parser) - except elementpath.ElementPathError as e: - message = field.gettext('\'%s\' is not a valid XPath expression. (%s)') - raise ValidationError(message % (line, str(e))) - except: - raise ValidationError("A system-error occurred when validating your XPath expression") + if not line: + continue - if line.strip().startswith('xpath1:'): + if line.startswith('xpath') or line.startswith('/'): if not self.allow_xpath: raise ValidationError("XPath not permitted in this field!") - from lxml import etree, html - tree = html.fromstring("") - line = re.sub(r'^xpath1:', '', line) + + if line.startswith('xpath1:'): + filter_function = xpath1_filter + else: + line = line.replace('xpath:', '') + filter_function = xpath_filter try: - tree.xpath(line.strip()) - except etree.XPathEvalError as e: + # Call the determined function + res = filter_function(xpath_filter=line, html_content=form.last_html_for_form_validation) + # It's OK if this is an empty result, we just want to check that it doesn't crash the parser + except (elementpath.ElementPathError,XPathEvalError) as e: message = field.gettext('\'%s\' is not a valid XPath expression. (%s)') raise ValidationError(message % (line, str(e))) - except: + except Exception as e: raise ValidationError("A system-error occurred when validating your XPath expression") - if 'json:' in line: + elif 'json:' in line: if not self.allow_json: raise ValidationError("JSONPath not permitted in this field!") @@ -392,7 +382,7 @@ def __call__(self, form, field): if not self.allow_json: raise ValidationError("jq not permitted in this field!") - if 'jq:' in line: + elif line.startswith('jq:'): try: import jq except ModuleNotFoundError: diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index bd5fdb8f496..456bdcfbc9e 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -8,6 +8,7 @@ import json import re +from loguru import logger # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis TEXT_FILTER_LIST_LINE_SUFFIX = "