diff --git a/archive_query_log/cli/parsers.py b/archive_query_log/cli/parsers.py index ca60342..9b6d8fb 100644 --- a/archive_query_log/cli/parsers.py +++ b/archive_query_log/cli/parsers.py @@ -399,13 +399,13 @@ def warc_direct_answers() -> None: type=Choice(CHOICES_WARC_DIRECT_ANSWERS_PARSER_TYPE), required=True) @option("--xpath", type=str) @option("--url-xpath", type=str) -@option("--title-xpath", type=str) @option("--text-xpath", type=str) @pass_config def warc_direct_answers_add( config: Config, provider_id: str | None, url_pattern_regex: str | None, + priority: float | None, parser_type: str, xpath: str | None, url_xpath: str | None, @@ -425,6 +425,7 @@ def warc_direct_answers_add( config=config, provider_id=provider_id, url_pattern_regex=url_pattern_regex, + priority=priority, parser_type=parser_type_strict, xpath=xpath, url_xpath=url_xpath, diff --git a/archive_query_log/imports/yaml.py b/archive_query_log/imports/yaml.py index caceacc..da4d9c6 100644 --- a/archive_query_log/imports/yaml.py +++ b/archive_query_log/imports/yaml.py @@ -500,6 +500,7 @@ def import_warc_direct_answers_parsers(config: Config, services_path: Path) -> N continue results_parsers = service["results_parsers"] + num_results_parsers = len(results_parsers) providers = ( Provider.search(using=config.es.client) @@ -508,7 +509,7 @@ def import_warc_direct_answers_parsers(config: Config, services_path: Path) -> N ) providers = safe_iter_scan(providers) for provider in providers: - for results_parser in enumerate(results_parsers): + for k, results_parser in enumerate(results_parsers): if results_parser["type"] != "html_selector": continue results_selector = results_parser["results_selector"] @@ -546,6 +547,7 @@ def import_warc_direct_answers_parsers(config: Config, services_path: Path) -> N config=config, provider_id=provider.meta.id, url_pattern_regex=results_parser.get("url_pattern"), + priority=num_results_parsers - k, parser_type="xpath", xpath=results_xpath, url_xpath=url_xpath,