Skip to content

Commit

Permalink
added priority
Browse files Browse the repository at this point in the history
  • Loading branch information
JKlueber committed Apr 11, 2024
1 parent a7af589 commit 9e62837
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
3 changes: 2 additions & 1 deletion archive_query_log/cli/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,13 +399,13 @@ def warc_direct_answers() -> None:
type=Choice(CHOICES_WARC_DIRECT_ANSWERS_PARSER_TYPE), required=True)
@option("--xpath", type=str)
@option("--url-xpath", type=str)
@option("--title-xpath", type=str)
@option("--text-xpath", type=str)
@pass_config
def warc_direct_answers_add(
config: Config,
provider_id: str | None,
url_pattern_regex: str | None,
priority: float | None,
parser_type: str,
xpath: str | None,
url_xpath: str | None,
Expand All @@ -425,6 +425,7 @@ def warc_direct_answers_add(
config=config,
provider_id=provider_id,
url_pattern_regex=url_pattern_regex,
priority=priority,
parser_type=parser_type_strict,
xpath=xpath,
url_xpath=url_xpath,
Expand Down
4 changes: 3 additions & 1 deletion archive_query_log/imports/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,7 @@ def import_warc_direct_answers_parsers(config: Config, services_path: Path) -> N
continue

results_parsers = service["results_parsers"]
num_results_parsers = len(results_parsers)

providers = (
Provider.search(using=config.es.client)
Expand All @@ -508,7 +509,7 @@ def import_warc_direct_answers_parsers(config: Config, services_path: Path) -> N
)
providers = safe_iter_scan(providers)
for provider in providers:
for results_parser in enumerate(results_parsers):
for k, results_parser in enumerate(results_parsers):
if results_parser["type"] != "html_selector":
continue
results_selector = results_parser["results_selector"]
Expand Down Expand Up @@ -546,6 +547,7 @@ def import_warc_direct_answers_parsers(config: Config, services_path: Path) -> N
config=config,
provider_id=provider.meta.id,
url_pattern_regex=results_parser.get("url_pattern"),
priority=num_results_parsers - k,
parser_type="xpath",
xpath=results_xpath,
url_xpath=url_xpath,
Expand Down

0 comments on commit 9e62837

Please sign in to comment.