diff --git a/.github/workflows/end-to-end-test-macos.yml b/.github/workflows/end-to-end-test-macos.yml index c78f819d..7d396810 100644 --- a/.github/workflows/end-to-end-test-macos.yml +++ b/.github/workflows/end-to-end-test-macos.yml @@ -16,59 +16,90 @@ jobs: os: [macos-12] steps: - - name: Checkout the QLever script + - name: Checkout the repository for the qlever script uses: actions/checkout@v3 with: path: qlever-control - - name: Install dependencies for the QLever script + - name: Install qlever script locally + dependencies needed for testing + working-directory: ${{github.workspace}}/qlever-control run: | + python3 -m pip install --upgrade pip setuptools wheel + python3 --version + pip3 --version + pip3 show setuptools wheel + pip3 install -e . + brew install unzip expect + pip3 install flake8 brew install llvm@16 brew install conan@2 - brew install unzip expect - pip3 install flake8 psutil termcolor - - name: Format and compile check - working-directory: ${{github.workspace}}/qlever-control + - name: Show version of built-in vs. installed clang run: | - flake8 qlever - python3 -m py_compile qlever + echo "Version of built-in clang:" + clang++ --version + echo "Version of installed clang:" + export PATH="/usr/local/opt/llvm@16/bin:$PATH" + clang++ --version - - name: Cache for conan (to make the next step much faster) + - name: Cache for conan modules uses: actions/cache@v3 env: - cache-name: cache-conan-modules + cache-name: conan-modules with: path: ~/.conan2 - key: ${{runner.os}}-build-${{env.cache-name}}-${{hashFiles('conanfile.txt')}} + key: ${{matrix.os}}-${{env.cache-name}}-2 - - name: Download QLever binaries and install dependencies + - name: Cache for qlever code and binaries + uses: actions/cache@v3 + env: + cache-name: qlever-code + with: + path: ${{github.workspace}}/qlever-code + key: ${{matrix.os}}-${{env.cache-name}} + + - name: Update qlever repository (clone if not cached) + run: | + if [ ! -d qlever-code ]; then + git clone https://github.com/ad-freiburg/qlever.git qlever-code; fi + cd qlever-code + git pull + + - name: Install dependencies using conan (takes long if not cached) + working-directory: ${{github.workspace}}/qlever-code run: | - git clone https://github.com/ad-freiburg/qlever.git qlever-code - mkdir -p qlever-code/build && cd $_ - conan install .. -pr:b=../conanprofiles/clang-16-macos -pr:h=../conanprofiles/clang-16-macos -of=. --build=missing - cd ../.. - mkdir qlever-binaries && cd $_ - wget -q https://ad-research.cs.uni-freiburg.de/downloads/qlever/macos-12/ServerMain - wget -q https://ad-research.cs.uni-freiburg.de/downloads/qlever/macos-12/IndexBuilderMain - chmod 755 ServerMain IndexBuilderMain + mkdir -p build && cd $_ + conan install .. -pr:b=../conanprofiles/clang-16-macos -pr:h=../conanprofiles/clang-16-macos -of=. --build=missing; - - name: Check that everything is found and runs + - name: Build qlever binaries + working-directory: ${{github.workspace}}/qlever-code run: | - pwd && ls -lh - export PATH="$PATH:$(pwd)/qlever-control:$(pwd)/qlever-binaries" + export PATH="/usr/local/opt/llvm@16/bin:$PATH" + export LDFLAGS="-L/usr/local/opt/llvm@16/lib -L/usr/local/opt/llvm@16/lib/c++ -Wl,-rpath,/usr/local/opt/llvm@16/lib/c++" + export CPPFLAGS="-I/usr/local/opt/llvm@16/include" + cmake -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE="$(pwd)/build/conan_toolchain.cmake" -DUSE_PARALLEL=true -DRUN_EXPENSIVE_TESTS=false -DENABLE_EXPENSIVE_CHECKS=true -DCMAKE_CXX_COMPILER=clang++ -DADDITIONAL_COMPILER_FLAGS="-fexperimental-library" -DADDITIONAL_LINKER_FLAGS="-L$(brew --prefix llvm)/lib/c++" + source build/conanrun.sh + make -C build ServerMain IndexBuilderMain + + - name: Check that qlever binaries and qlever script are found and work + run: | + export PATH="$PATH:$(pwd)/qlever-code/build" source qlever-code/build/conanrun.sh - ServerMain --help > /dev/null - IndexBuilderMain --help > /dev/null - qlever - qlever help + ServerMain --help | head -3; echo "..." + IndexBuilderMain --help | head -3; echo "..." + qlever --help - - name: Test actions for olympics dataset, without Docker + - name: Test qlever script for olympics dataset, without Docker + timeout-minutes: 3 run: | - export PATH="$PATH:$(pwd)/qlever-control:$(pwd)/qlever-binaries" - mkdir -p ${{github.workspace}}/qlever-indices/olympics && cd $_ - source ${{github.workspace}}/qlever-code/build/conanrun.sh + export PATH="$PATH:$(pwd)/qlever-code/build" + source qlever-code/build/conanrun.sh + export QLEVER_ARGCOMPLETE_ENABLED=1 + mkdir -p qlever-indices/olympics && cd $_ qlever setup-config olympics - qlever get-data docker.USE_DOCKER=false index index-stats - qlever docker.USE_DOCKER=false start status test-query stop restart test-query stop status start test-query status stop + qlever get-data + qlever index --system native + qlever start --system native + qlever status + qlever stop ls -lh diff --git a/.github/workflows/end-to-end-test-ubuntu.yml b/.github/workflows/end-to-end-test-ubuntu.yml index c2d32e2f..67ee5a31 100644 --- a/.github/workflows/end-to-end-test-ubuntu.yml +++ b/.github/workflows/end-to-end-test-ubuntu.yml @@ -16,73 +16,92 @@ jobs: os: [ubuntu-22.04] steps: - - name: Checkout the QLever script + - name: Checkout the repository for the qlever script uses: actions/checkout@v3 with: path: qlever-control - - name: Install dependencies for the QLever script - run: | - sudo apt update - sudo apt install unzip flake8 expect - pip3 install psutil termcolor - - - name: Format and compile check + - name: Install qlever script locally + dependencies needed for testing working-directory: ${{github.workspace}}/qlever-control run: | - flake8 qlever - python3 -m py_compile qlever + python3 -m pip install --upgrade pip setuptools wheel + python3 --version + pip3 --version + pip3 show setuptools wheel + pip3 install -e . + sudo apt update && sudo apt install unzip expect - - name: Download QLever binaries and docker image and install dependencies + - name: Cache for qlever code and binaries + uses: actions/cache@v3 + env: + cache-name: qlever-code + with: + path: ${{github.workspace}}/qlever-code + key: ${{matrix.os}}-${{env.cache-name}} + + - name: Build qlever binaries from source + download docker image run: | - # Download Docker image. + sudo apt update + sudo apt install -y software-properties-common + sudo add-apt-repository -y ppa:mhier/libboost-latest + sudo apt install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev + if [ ! -d qlever-code ]; then + git clone https://github.com/ad-freiburg/qlever.git qlever-code; fi + cd qlever-code + git pull + mkdir -p build && cd $_ + cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -GNinja .. + ninja ServerMain IndexBuilderMain docker pull adfreiburg/qlever - # Download QLever binaries. - mkdir qlever-binaries && cd $_ - wget -q https://ad-research.cs.uni-freiburg.de/downloads/qlever/ubuntu-22.04/ServerMain - wget -q https://ad-research.cs.uni-freiburg.de/downloads/qlever/ubuntu-22.04/IndexBuilderMain - chmod 755 ServerMain IndexBuilderMain - # Install depencies needed for running the binaries. - curl -Gs https://raw.githubusercontent.com/ad-freiburg/qlever/master/Dockerfile | sed -En 's/(add-apt|apt|tee)/sudo \1/g; s/^RUN //p' | sed '/^cmake/q' | sed -E 's/^(cmake.*)/mkdir -p build \&\& cd build\n\1\ncd ../' | sed -n '/^sudo/p' > INSTALL - cat INSTALL - source ./INSTALL - - name: Check that everything is found and runs + - name: Check that qlever binaries, docker image, and qlever script are found and work run: | mkdir qlever-indices - pwd && ls -lh - export PATH="$PATH:$(pwd)/qlever-control:$(pwd)/qlever-binaries" - docker run --entrypoint bash adfreiburg/qlever -c "ServerMain --help" > /dev/null - docker run --entrypoint bash adfreiburg/qlever -c "IndexBuilderMain --help" > /dev/null - ServerMain --help > /dev/null - IndexBuilderMain --help > /dev/null - qlever - qlever help + export PATH="$PATH:$(pwd)/qlever-control:$(pwd)/qlever-code/build" + docker run --entrypoint bash adfreiburg/qlever -c "ServerMain --help" | head -3 + docker run --entrypoint bash adfreiburg/qlever -c "IndexBuilderMain --help" | head -3 + ServerMain --help | head -3; echo "..." + IndexBuilderMain --help | head -3; echo "..." + qlever --help - - name: Test actions for olympics dataset, with Docker + - name: Test qlever script for olympics dataset, with docker + timeout-minutes: 1 run: | - export PATH="$PATH:$(pwd)/qlever-control:$(pwd)/qlever-binaries" + export PATH="$PATH:$(pwd)/qlever-code/build" + export QLEVER_ARGCOMPLETE_ENABLED=1 mkdir -p ${{github.workspace}}/qlever-indices/olympics.1 && cd $_ qlever setup-config olympics - unbuffer qlever get-data docker.USE_DOCKER=true index index-stats start status test-query restart status test-query stop status start stop start test-query stop status + qlever get-data + unbuffer qlever index + unbuffer qlever start + qlever status + qlever stop ls -lh - - name: Test actions for olympics dataset, without Docker + - name: Test qlever script for olympics dataset, without docker + timeout-minutes: 1 run: | - export PATH="$PATH:$(pwd)/qlever-control:$(pwd)/qlever-binaries" + export PATH="$PATH:$(pwd)/qlever-code/build" + export QLEVER_ARGCOMPLETE_ENABLED=1 mkdir -p ${{github.workspace}}/qlever-indices/olympics.2 && cd $_ qlever setup-config olympics - qlever get-data docker.USE_DOCKER=false index index-stats start status test-query restart status test-query stop status start stop start test-query stop status + qlever get-data + qlever index --system native + qlever start --system native + qlever status + qlever stop ls -lh - - name: Test actions for olympics dataset, with and without Docker + - name: Test qlever script for olympics dataset, with and without docker + timeout-minutes: 1 run: | - export PATH="$PATH:$(pwd)/qlever-control:$(pwd)/qlever-binaries" + export PATH="$PATH:$(pwd)/qlever-code/build" + export QLEVER_ARGCOMPLETE_ENABLED=1 mkdir -p ${{github.workspace}}/qlever-indices/olympics.3 && cd $_ qlever setup-config olympics - unbuffer qlever get-data index index-stats - qlever docker.USE_DOCKER=false start status test-query - unbuffer qlever docker.USE_DOCKER=true stop status - unbuffer qlever docker.USE_DOCKER=true restart status test-query - qlever docker.USE_DOCKER=false stop status + qlever get-data + unbuffer qlever index + qlever start --system native + qlever status + qlever stop ls -lh diff --git a/.github/workflows/format-check.yml b/.github/workflows/format-check.yml new file mode 100644 index 00000000..4c4be1d5 --- /dev/null +++ b/.github/workflows/format-check.yml @@ -0,0 +1,32 @@ +name: Format check + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + merge_group: + +jobs: + format-check: + runs-on: ${{matrix.os}} + strategy: + fail-fast: true + matrix: + os: [ubuntu-22.04] + + steps: + - name: Checkout the repository for the qlever script + uses: actions/checkout@v3 + with: + path: qlever-control + + - name: Install dependencies needed for checking + run: | + sudo apt update && sudo apt install flake8 + pip install isort + + - name: Check format, compilation, and imports + working-directory: ${{github.workspace}}/qlever-control + run: | + for PY in $(find src test -name "*.py"); do printf "$PY ... "; flake8 $PY && python3 -m py_compile $PY && isort -c $PY && echo "OK"; done diff --git a/pyproject.toml b/pyproject.toml index 6ba0e7d0..b89b6c5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,31 +1,37 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + [project] name = "qlever" description = "Script for using the QLever SPARQL engine." -version = "0.4.0" +version = "0.5.0" authors = [ { name = "Hannah Bast", email = "bast@cs.uni-freiburg.de" } ] readme = "README.md" -license = { file = "LICENSE" } +license = { text = "Apache-2.0" } requires-python = ">=3.8" -keywords = ["SPARQL", "RDF", "knowledge graphs", "triple store"] +keywords = ["SPARQL", "RDF", "Knowledge Graphs", "Triple Store"] classifiers = [ "Topic :: Database :: Database Engines/Servers", "Topic :: Database :: Front-Ends" ] -dependencies = [ "psutil", "termcolor" ] +dependencies = [ "psutil", "termcolor", "argcomplete" ] [project.urls] -Github = "https://github.com/ad-freiburg/qlever-control" +Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" -"qlever-old" = "qlever.__main__:main" +"qlever-old" = "qlever.qlever_old:main" [tool.setuptools] -package-dir = { "" = "src" } -packages = [ "qlever", "qlever.commands", "qlever.Qleverfiles" ] -# package-data = { "qlever" = ["Qleverfiles/*"] } +license-files = ["LICENSE"] +package-data = { "qlever" = ["Qleverfiles/*"] } + +[tool.pytest.ini_options] +pythonpath = ["src"] diff --git a/src/qlever/Qleverfiles/Qleverfile.ohm-planet b/src/qlever/Qleverfiles/Qleverfile.ohm-planet new file mode 100644 index 00000000..dce4f24b --- /dev/null +++ b/src/qlever/Qleverfiles/Qleverfile.ohm-planet @@ -0,0 +1,37 @@ +# Qleverfile for OHM Planet, use with https://github.com/ad-freiburg/qlever-control +# +# qlever get-data # ~20 mins (download PBF, convert to TTL, add GeoSPARQL triples) +# qlever index # ~10 mins and ~5 GB RAM (on an AMD Ryzen 9 5900X) +# qlever start # start the server (instantaneous) +# +# For `qlever get-data` to work, `osm2rdf` and `spatialjoin` must be installed +# and included in the `PATH`. + +[data] +NAME = ohm-planet +GET_DATA_URL = $$(curl -s https://planet.openhistoricalmap.org/planet/state.txt) +GET_DATA_CMD_1 = curl -LRfC - -o ${NAME}.pbf ${GET_DATA_URL} 2>&1 | tee ${NAME}.download-log.txt +GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --simplify-wkt 0 --write-ogc-geo-triples none 2>&1 | tee ${NAME}.osm2rdf-log.txt +GET_DATA_CMD_3 = bzcat ${NAME}.ttl.bz2 | \grep "^osm2rdf" | sed -En 's/^osm2rdf(geom)?:(ohm_)?(node|rel|way)[a-z]*_([0-9]+) geo:asWKT "([^\"]+)".*/ohm\3:\4\t\5/p' | tee ${NAME}.spatialjoin-input.tsv | spatialjoin --contains ' ogc:sfContains ' --intersects ' ogc:sfIntersects ' --suffix $$' .\n' -o ${NAME}.spatialjoin-triples.ttl.bz2 2>&1 | tee ${NAME}.spatialjoin-log.txt && rm -f areas events lines points simplelines +GET_DATA_CMD = ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2} && echo && ${GET_DATA_CMD_3} && head -100 <(bzcat ${NAME}.ttl.bz2) | sed '/^@prefix/!d' > ${NAME}.prefix-definitions +VERSION = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE") +DESCRIPTION = OHM Planet, data from ${GET_DATA_URL} version ${VERSION} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects) + +[index] +INPUT_FILES = ${data:NAME}.prefix-definitions ${data:NAME}.spatialjoin-triples.ttl.bz2 ${data:NAME}.ttl.bz2 +CAT_INPUT_FILES = bzcat -f ${INPUT_FILES} +SETTINGS_JSON = { "prefixes-external": [""], "ascii-prefixes-only": false, "parallel-parsing": true, "num-triples-per-batch": 5000000 } + +[server] +PORT = 7037 +ACCESS_TOKEN = ${data:NAME}_32673264324 +MEMORY_FOR_QUERIES = 10G +CACHE_MAX_SIZE = 5G +WARMUP_CMD = curl -s http://localhost:${PORT} -H "Accept: application/qlever-results+json" --data-urlencode "query=PREFIX geo: SELECT ?subject ?geometry WHERE { ?subject geo:hasGeometry ?m . ?m geo:asWKT ?geometry } INTERNAL SORT BY ?subject" --data-urlencode "access-token=${server:ACCESS_TOKEN}" --data-urlencode "pinresult=true" --data-urlencode "send=0" | jq .resultsize | xargs printf "Result size: %'d\n" + + +[runtime] +SYSTEM = native + +[ui] +CONFIG = ohm-planet diff --git a/src/qlever/__init__.py b/src/qlever/__init__.py index 67c90f14..17b17535 100644 --- a/src/qlever/__init__.py +++ b/src/qlever/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import sys from pathlib import Path diff --git a/src/qlever/commands/example_queries.py b/src/qlever/commands/example_queries.py index 5af33865..e4511310 100644 --- a/src/qlever/commands/example_queries.py +++ b/src/qlever/commands/example_queries.py @@ -4,13 +4,15 @@ import shlex import subprocess import time +import traceback +from pathlib import Path from termcolor import colored from qlever.command import QleverCommand from qlever.commands.clear_cache import ClearCacheCommand from qlever.log import log, mute_log -from qlever.util import run_command +from qlever.util import run_command, run_curl_command class ExampleQueriesCommand(QleverCommand): @@ -57,12 +59,27 @@ def additional_arguments(self, subparser) -> None: "or just compute the size of the result") subparser.add_argument("--limit", type=int, help="Limit on the number of results") + subparser.add_argument("--accept", type=str, + choices=["text/tab-separated-values", + "application/sparql-results+json"], + default="text/tab-separated-values", + help="Accept header for the SPARQL query") subparser.add_argument("--clear-cache", choices=["yes", "no"], default="yes", help="Clear the cache before each query") def execute(self, args) -> bool: + # If `args.accept` is `application/sparql-results+json`, we need `jq`. + if args.accept == "application/sparql-results+json": + try: + subprocess.run("jq --version", shell=True, check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + except Exception as e: + log.error(f"Please install `jq` for {args.accept} ({e})") + return False + # Handle shotcuts for SPARQL endpoint. if args.sparql_endpoint_preset in self.presets: args.sparql_endpoint = self.presets[args.sparql_endpoint_preset] @@ -92,6 +109,7 @@ def execute(self, args) -> bool: else f"localhost:{args.port}") self.show(f"Obtain queries via: {get_queries_cmd}\n" f"SPARQL endpoint: {sparql_endpoint}\n" + f"Accept header: {args.accept}\n" f"Clear cache before each query:" f" {args.clear_cache.upper()}\n" f"Download result for each query or just count:" @@ -103,7 +121,8 @@ def execute(self, args) -> bool: # Get the example queries. try: - example_query_lines = run_command(get_queries_cmd, return_output=True) + example_query_lines = run_command(get_queries_cmd, + return_output=True) if len(example_query_lines) == 0: log.error("No example queries matching the criteria found") return False @@ -114,9 +133,10 @@ def execute(self, args) -> bool: # Launch the queries one after the other and for each print: the # description, the result size, and the query processing time. - count = 0 total_time_seconds = 0.0 total_result_size = 0 + count_succeeded = 0 + count_failed = 0 for example_query_line in example_query_lines: # Parse description and query. description, query = example_query_line.split("\t") @@ -155,44 +175,93 @@ def execute(self, args) -> bool: + f" }} LIMIT {args.limit}" # Launch query. - query_cmd = (f"curl -sv {sparql_endpoint}" - f" -H \"Accept: text/tab-separated-values\"" - f" --data-urlencode query={shlex.quote(query)}") - if args.download_or_count == "count": - query_cmd += " | sed 1d" - else: - query_cmd += " | sed 1d | wc -l" try: - log.debug(query_cmd) + curl_cmd = (f"curl -s {sparql_endpoint}" + f" -w \"HTTP code: %{{http_code}}\\n\"" + f" -H \"Accept: {args.accept}\"" + f" --data-urlencode query={shlex.quote(query)}") + log.debug(curl_cmd) + result_file = (f"qlever.example_queries.result." + f"{abs(hash(curl_cmd))}.tmp") start_time = time.time() - result_size = run_command(query_cmd, return_output=True) - result_size = int(result_size.strip()) + http_code = run_curl_command(sparql_endpoint, + headers={"Accept": args.accept}, + params={"query": query}, + result_file=result_file).strip() + if http_code != "200": + raise Exception(f"HTTP code {http_code}" + f" {Path(result_file).read_text()}") time_seconds = time.time() - start_time - time_string = f"{time_seconds:.2f}" - result_string = f"{result_size:>14,}" + error_msg = None except Exception as e: - time_seconds = 0.0 - time_string = "---" - result_size = 0 - result_string = colored(f" FAILED {e}", "red") + if args.log_level == "DEBUG": + traceback.print_exc() + error_msg = re.sub(r"\s+", " ", str(e)) + + # Get result size (via the command line, in order to avoid loading + # a potentially large JSON file into Python, which is slow). + if error_msg is None: + try: + if args.download_or_count == "count": + if args.accept == "text/tab-separated-values": + result_size = run_command( + f"sed 1d {result_file}", + return_output=True) + else: + result_size = run_command( + f"jq -r \".results.bindings[0]" + f" | to_entries[0].value.value" + f" | tonumber\" {result_file}", + return_output=True) + else: + if args.accept == "text/tab-separated-values": + result_size = run_command( + f"sed 1d {result_file} | wc -l", + return_output=True) + else: + result_size = run_command( + f"jq -r \".results.bindings | length\"" + f" {result_file}", + return_output=True) + result_size = int(result_size) + except Exception as e: + error_msg = str(e) # Print description, time, result in tabular form. if (len(description) > 60): description = description[:57] + "..." - log.info(f"{description:<60} {time_string:>6} s " - f"{result_string}") - count += 1 - total_time_seconds += time_seconds - total_result_size += result_size + if error_msg is None: + log.info(f"{description:<60} {time_seconds:6.2f} s " + f"{result_size:14,}") + count_succeeded += 1 + total_time_seconds += time_seconds + total_result_size += result_size + else: + count_failed += 1 + if (len(error_msg) > 60) and args.log_level != "DEBUG": + error_msg = error_msg[:57] + "..." + log.error(f"{description:<60} failed " + f"{colored(error_msg, 'red')}") # Print total time. log.info("") - description = (f"TOTAL for {count} " - f"{'query' if count == 1 else 'queries'}") - log.info(f"{description:<60} {total_time_seconds:6.2f} s " - f"{total_result_size:>14,}") - description = (f"AVERAGE for {count} " - f"{'query' if count == 1 else 'queries'}") - log.info(f"{description:<60} {total_time_seconds / count:6.2f} s " - f"{round(total_result_size / count):>14,}") + if count_succeeded > 0: + query_or_queries = "query" if count_succeeded == 1 else "queries" + description = (f"TOTAL for {count_succeeded} {query_or_queries}") + log.info(f"{description:<60} " + f"{total_time_seconds:6.2f} s " + f"{total_result_size:>14,}") + description = (f"AVERAGE for {count_succeeded} {query_or_queries}") + log.info(f"{description:<60} " + f"{total_time_seconds / count_succeeded:6.2f} s " + f"{round(total_result_size / count_succeeded):>14,}") + else: + if count_failed == 1: + log.info(colored("One query failed", "red")) + elif count_failed > 1: + log.info(colored("All queries failed", "red")) + + # Return success (has nothing to do with how many queries failed). + if args.log_level != "DEBUG": + Path(result_file).unlink(missing_ok=True) return True diff --git a/src/qlever/commands/get_data.py b/src/qlever/commands/get_data.py index c458efa5..4ae2bb7d 100644 --- a/src/qlever/commands/get_data.py +++ b/src/qlever/commands/get_data.py @@ -1,9 +1,10 @@ +from __future__ import annotations + import shlex -import subprocess from qlever.command import QleverCommand from qlever.log import log -from qlever.util import get_total_file_size +from qlever.util import get_total_file_size, run_command class GetDataCommand(QleverCommand): @@ -34,8 +35,7 @@ def execute(self, args) -> bool: # Execute the command line. try: - subprocess.run(args.get_data_cmd, shell=True, check=True, - stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL) + run_command(args.get_data_cmd, show_output=True) except Exception as e: log.error(f"Problem executing \"{args.get_data_cmd}\": {e}") return False diff --git a/src/qlever/commands/index_stats.py b/src/qlever/commands/index_stats.py index 1f7c0c0e..975576ac 100644 --- a/src/qlever/commands/index_stats.py +++ b/src/qlever/commands/index_stats.py @@ -71,14 +71,17 @@ def execute_time(self, args, log_file_name) -> bool: # Helper function that finds the next line matching the given `regex`, # starting from `current_line`, and extracts the time. Returns a tuple - # of the time and the regex match object. If a match is found, - # `current_line` is updated to the line after the match. Otherwise, - # `current_line` will be one beyond the last line, unless - # `line_is_optional` is true, in which case it will be the same as when - # the function was entered. + # of the time and the regex match object. + # + # If `update_current_line` is `False`, then `current_line` will not be + # updated by this call. + # + # Otherwise, and this is the default behavior, `current_line` will be + # updated to the line after the first match, or one beyond the last + # line if no match is found. current_line = 0 - def find_next_line(regex, line_is_optional=False): + def find_next_line(regex, update_current_line=True): nonlocal lines nonlocal current_line current_line_backup = current_line @@ -99,7 +102,7 @@ def find_next_line(regex, line_is_optional=False): f"\"{timestamp_regex}\" from line " f" \"{line.rstrip()}\" ({e})") # If we get here, we did not find a matching line. - if line_is_optional: + if not update_current_line: current_line = current_line_backup return None, None @@ -110,24 +113,34 @@ def find_next_line(regex, line_is_optional=False): convert_begin, _ = find_next_line(r"INFO:\s*Converting triples") perm_begin_and_info = [] while True: - perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair", True) + # Find the next line that starts a permutation. + # + # NOTE: Should work for the old and new format of the index log + # file (old format: "Creating a pair" + names of permutations in + # line "Writing meta data for ..."; new format: name of + # permutations already in line "Creating permutations ..."). + perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair", + update_current_line=False) if perm_begin is None: + perm_begin, perm_info = find_next_line( + r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)", + update_current_line=False) + else: + _, perm_info = find_next_line( + r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)", + update_current_line=False) + if perm_info is None: break - _, perm_info = find_next_line(r"INFO:\s*Writing meta data for" - r" ([A-Z]+ and [A-Z]+)", True) - # if perm_info is None: - # break perm_begin_and_info.append((perm_begin, perm_info)) convert_end = (perm_begin_and_info[0][0] if len(perm_begin_and_info) > 0 else None) normal_end, _ = find_next_line(r"INFO:\s*Index build completed") - text_begin, _ = find_next_line(r"INFO:\s*Adding text index", True) - text_end, _ = find_next_line(r"INFO:\s*Text index build comp", True) + text_begin, _ = find_next_line(r"INFO:\s*Adding text index", + update_current_line=False) + text_end, _ = find_next_line(r"INFO:\s*Text index build comp", + update_current_line=False) if args.ignore_text_index: text_begin = text_end = None - # print("DEBUG:", len(perm_begin_and_info), perm_begin_and_info) - # print("DEBUG:", overall_begin) - # print("DEBUG:", normal_end) # Check whether at least the first phase is done. if overall_begin is None: diff --git a/src/qlever/commands/setup_config.py b/src/qlever/commands/setup_config.py index 12afe984..c1140c1b 100644 --- a/src/qlever/commands/setup_config.py +++ b/src/qlever/commands/setup_config.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import subprocess from pathlib import Path diff --git a/src/qlever/config.py b/src/qlever/config.py index 8cc0f154..35e0bb94 100644 --- a/src/qlever/config.py +++ b/src/qlever/config.py @@ -3,9 +3,11 @@ import argparse import os import traceback +from importlib.metadata import version from pathlib import Path import argcomplete +from termcolor import colored from qlever import command_objects, script_name from qlever.log import log, log_levels @@ -114,17 +116,11 @@ def parse_args(self): argcomplete_enabled = os.environ.get("QLEVER_ARGCOMPLETE_ENABLED") if not argcomplete_enabled and not argcomplete_check_off: log.info("") - log.warn(f"Autocompletion is not enabled for this script, run " - f"the following command, and consider adding it to your " - f"`.bashrc` or `.zshrc`:" + log.warn(f"To enable autocompletion, run the following command, " + f"and consider adding it to your `.bashrc` or `.zshrc`:" f"\n\n" f"eval \"$(register-python-argcomplete {script_name})\"" - f" && export QLEVER_ARGCOMPLETE_ENABLED=1" - f"\n\n" - f"If autocompletion does not work for you or you don't " - f"want to use it, disable this warning as follows:" - f"\n\n" - f"export QLEVER_ARGCOMPLETE_CHECK_OFF") + f" && export QLEVER_ARGCOMPLETE_ENABLED=1") log.info("") # Create a temporary parser only to parse the `--qleverfile` option, in @@ -179,7 +175,12 @@ def add_qleverfile_option(parser): # command. We have a dedicated class for each command. These classes # are defined in the modules in `qlever/commands`. In `__init__.py` # an object of each class is created and stored in `command_objects`. - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + description=colored("This is the qlever command line tool, " + "it's all you need to work with QLever", + attrs=["bold"])) + parser.add_argument("--version", action="version", + version=f"%(prog)s {version('qlever')}") add_qleverfile_option(parser) subparsers = parser.add_subparsers(dest='command') subparsers.required = True @@ -195,6 +196,11 @@ def add_qleverfile_option(parser): # because it is executed whenever the user triggers the autocompletion. argcomplete.autocomplete(parser, always_complete_options="long") + # If called without arguments, show the help message. + if len(os.sys.argv) == 1: + parser.print_help() + exit(0) + # Parse the command line arguments. args = parser.parse_args() diff --git a/src/qlever/log.py b/src/qlever/log.py index a741f8ce..0a9358c5 100644 --- a/src/qlever/log.py +++ b/src/qlever/log.py @@ -1,6 +1,4 @@ -# Copyright 2024, University of Freiburg, -# Chair of Algorithms and Data Structures -# Author: Hannah Bast +from __future__ import annotations import logging from contextlib import contextmanager diff --git a/src/qlever/qlever_main.py b/src/qlever/qlever_main.py index a4d0dbda..a936abd5 100644 --- a/src/qlever/qlever_main.py +++ b/src/qlever/qlever_main.py @@ -5,6 +5,8 @@ # Chair of Algorithms and Data Structures # Author: Hannah Bast +from __future__ import annotations + import re import traceback diff --git a/src/qlever/__main__.py b/src/qlever/qlever_old.py similarity index 98% rename from src/qlever/__main__.py rename to src/qlever/qlever_old.py index 6be6b3ab..c8b3bd7c 100755 --- a/src/qlever/__main__.py +++ b/src/qlever/qlever_old.py @@ -5,14 +5,11 @@ # convenient command-line tool for all things QLever. See the `README.md` file # for how to use it. -from configparser import ConfigParser, ExtendedInterpolation -from datetime import datetime, date -import os import glob import inspect import json import logging -import psutil +import os import re import shlex import shutil @@ -20,16 +17,20 @@ import subprocess import sys import time -import pkg_resources -from termcolor import colored import traceback +from configparser import ConfigParser, ExtendedInterpolation +from datetime import date, datetime + +import pkg_resources +import psutil + +from qlever.log import log BLUE = "\033[34m" RED = "\033[31m" BOLD = "\033[1m" NORMAL = "\033[0m" -from qlever.log import log # # Custom formatter for log messages. # class CustomFormatter(logging.Formatter): # def format(self, record): @@ -42,8 +43,8 @@ # return colored(message, "red") # else: # return message -# -# +# +# # # Custom logger. # log = logging.getLogger("qlever") # log.setLevel(logging.INFO) @@ -94,14 +95,14 @@ def show_available_config_names(): def show_available_action_names(): log.info("You can now execute a sequence of actions, for example:") log.info("") - log.info(f"{BLUE}qlever get-data index restart test-query ui {NORMAL}") + log.info(f"{BLUE}qlever-old get-data index restart test-query ui {NORMAL}") log.info("") log.info(f"Available action names are: {', '.join(action_names)}") log.info("") log.info("To get autocompletion for these, run the following or " "add it to your `.bashrc`:") log.info("") - log.info(f"{BLUE}eval \"$(qlever setup-autocompletion)\"{NORMAL}") + log.info(f"{BLUE}eval \"$(qlever-old setup-autocompletion)\"{NORMAL}") # We want to distinguish between exception that we throw intentionally and all @@ -122,7 +123,7 @@ def __init__(self): log.error("The qlever script needs a \"Qleverfile\" " "in the current directory, but I could not find it") log.info("") - log.info("Run `qlever setup-config ` to create a " + log.info("Run `qlever-old setup-config ` to create a " "pre-filled Qleverfile") log.info("") show_available_config_names() @@ -325,9 +326,9 @@ def show(self, action_description, only_show): log.info(f"{BLUE}{action_description}{NORMAL}") log.info("") if only_show: - log.info("You called \"qlever ... show\", therefore the action " - "is only shown, but not executed (omit the \"show\" to " - "execute it)") + log.info("You called \"qlever-old ... show\", therefore the " + "action is only shown, but not executed (omit the " + "\"show\" to execute it)") @staticmethod @track_action_rank @@ -344,8 +345,8 @@ def action_setup_config(config_name="default"): log.error("Qleverfile already exists in current directory") log.info("") log.info("If you want to create a new Qleverfile using " - "`qlever setup-config`, delete the existing Qleverfile " - "first") + "`qlever-old setup-config`, delete the existing " + "Qleverfile first") abort_script() # Get the directory of this script and copy the Qleverfile for `config` @@ -1328,11 +1329,11 @@ def setup_autocompletion_cmd(): # Return multiline string with the command for setting up autocompletion. return f"""\ -_qlever_completion() {{ +_qlever_old_completion() {{ local cur=${{COMP_WORDS[COMP_CWORD]}} COMPREPLY=( $(compgen -W "{action_names}" -- $cur) ) }} -complete -o nosort -F _qlever_completion qlever +complete -o nosort -F _qlever_old_completion qlever-old """ @@ -1356,7 +1357,7 @@ def main(): (len(sys.argv) == 2 and sys.argv[1] == "--help") or \ (len(sys.argv) == 2 and sys.argv[1] == "-h"): log.info("") - log.info(f"{BOLD}Hello, I am the qlever script" + log.info(f"{BOLD}Hello, I am the OLD qlever script" f" (version {version}){NORMAL}") log.info("") if os.path.exists("Qleverfile"): @@ -1368,7 +1369,7 @@ def main(): log.info("You need a Qleverfile in the current directory, which " "you can create as follows:") log.info("") - log.info(f"{BLUE}qlever setup-config {NORMAL}") + log.info(f"{BLUE}qlever-old setup-config {NORMAL}") log.info("") show_available_config_names() log.info("") diff --git a/src/qlever/util.py b/src/qlever/util.py index 05bca73a..8c4b1e5e 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -1,7 +1,7 @@ from __future__ import annotations -import random import re +import secrets import shlex import shutil import string @@ -31,7 +31,7 @@ def run_command(cmd: str, return_output: bool = False, show_output: bool = False) -> Optional[str]: """ Run the given command and throw an exception if the exit code is non-zero. - If `get_output` is `True`, return what the command wrote to `stdout`. + If `return_output` is `True`, return what the command wrote to `stdout`. NOTE: The `set -o pipefail` ensures that the exit code of the command is non-zero if any part of the pipeline fails (not just the last part). @@ -68,6 +68,45 @@ def run_command(cmd: str, return_output: bool = False, return result.stdout +def run_curl_command(url: str, + headers: dict[str, str] = {}, + params: dict[str, str] = {}, + result_file: Optional[str] = None) -> str: + """ + Run `curl` with the given `url`, `headers`, and `params`. If `result_file` + is `None`, return the output, otherwise, write the output to the given file + and return the HTTP code. If the `curl` command fails, throw an exception. + + """ + # Construct and run the `curl` command. + default_result_file = "/tmp/qlever.curl.result" + actual_result_file = result_file if result_file else default_result_file + curl_cmd = (f"curl -s -o \"{actual_result_file}\"" + f" -w \"%{{http_code}}\n\" {url}" + + "".join([f" -H \"{key}: {value}\"" + for key, value in headers.items()]) + + "".join([f" --data-urlencode {key}={shlex.quote(value)}" + for key, value in params.items()])) + result = subprocess.run(curl_cmd, shell=True, text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + # Case 1: An error occurred, raise an exception. + if result.returncode != 0: + if len(result.stderr) > 0: + raise Exception(result.stderr) + else: + raise Exception(f"curl command failed with exit code " + f"{result.returncode}, stderr is empty") + # Case 2: Return output (read from `default_result_file`). + if result_file is None: + result_file_path = Path(default_result_file) + result = result_file_path.read_text() + result_file_path.unlink() + return result + # Case 3: Return HTTP code. + return result.stdout + + def is_qlever_server_alive(port: str) -> bool: """ Helper function that checks if a QLever server is running on the given @@ -82,30 +121,6 @@ def is_qlever_server_alive(port: str) -> bool: return exit_code == 0 -def get_curl_cmd_for_sparql_query( - query: str, port: int, - host: str = "localhost", - media_type: str = "application/sparql-results+qlever", - verbose: bool = False, - pinresult: bool = False, - access_token: Optional[str] = None, - send: Optional[int] = None) -> str: - """ - Get curl command for given SPARQL query. - """ - curl_cmd = (f"curl -s http::{host}:{port}" - f" -H \"Accept: {media_type}\" " - f" --data-urlencode query={shlex.quote(query)}") - if pinresult and access_token is not None: - curl_cmd += " --data-urlencode pinresult=true" - curl_cmd += f" --data-urlencode access_token={access_token}" - if send is not None: - curl_cmd += f" --data-urlencode send={send}" - if verbose: - curl_cmd += " --verbose" - return curl_cmd - - def get_existing_index_files(basename: str) -> list[str]: """ Helper function that returns a list of all index files for `basename` in @@ -137,8 +152,9 @@ def show_table_line(pid, user, start_time, rss, cmdline): pinfo = psutil_process.as_dict( attrs=['pid', 'username', 'create_time', 'memory_info', 'cmdline']) - cmdline = " ".join(pinfo['cmdline']) - if not re.search(cmdline_regex, cmdline): + # Note: pinfo[`cmdline`] is `None` if the process is a zombie. + cmdline = " ".join(pinfo['cmdline'] or []) + if len(cmdline) == 0 or not re.search(cmdline_regex, cmdline): return False pid = pinfo['pid'] user = pinfo['username'] if pinfo['username'] else "" @@ -162,6 +178,5 @@ def get_random_string(length: int) -> str: Helper function that returns a randomly chosen string of the given length. Take the current time as seed. """ - random.seed(datetime.now()) - return "".join(random.choices(string.ascii_letters + string.digits, - k=length)) + characters = string.ascii_letters + string.digits + return "".join(secrets.choice(characters) for _ in range(length)) diff --git a/test/qlever/test_util.py b/test/qlever/test_util.py new file mode 100644 index 00000000..aae782dc --- /dev/null +++ b/test/qlever/test_util.py @@ -0,0 +1,9 @@ +from qlever.util import get_random_string + + +def test_get_random_string(): + random_string_1 = get_random_string(20) + random_string_2 = get_random_string(20) + assert len(random_string_1) == 20 + assert len(random_string_2) == 20 + assert random_string_1 != random_string_2