Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unit tests for the index command #83

Merged
merged 52 commits into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
2ab3932
Add files via upload
SimonL22 Aug 14, 2024
ac200b0
Update test_status_execute_mocken.py
SimonL22 Aug 14, 2024
1704173
Update test_status_execute_mocken.py
SimonL22 Aug 14, 2024
953358f
Update test_status_execute_mocken.py
SimonL22 Aug 14, 2024
96ddcac
Update test_status_execute_mocken.py
SimonL22 Aug 14, 2024
12537dd
Update test_status_execute_mocken.py
SimonL22 Aug 20, 2024
6a8046d
Create pytest.yml
SimonL22 Aug 20, 2024
90fc463
Merge branch 'ad-freiburg:main' into test_status_execute_mocken
SimonL22 Aug 21, 2024
6704850
Create test_stop_execute
SimonL22 Aug 21, 2024
b58d7dd
Update and rename test/qlever/test_status_execute_mocken.py to test/q…
SimonL22 Aug 21, 2024
d46e928
Update test_status_execute_mocken.py
SimonL22 Aug 21, 2024
a62da4d
Add files via upload
SimonL22 Aug 21, 2024
98811a8
Update test_status_other_methods.py
SimonL22 Aug 26, 2024
6fe6558
Update pytest.yml
SimonL22 Aug 27, 2024
b18c36f
Merge branch 'ad-freiburg:main' into test_status_execute_mocken
SimonL22 Aug 31, 2024
4bdbf92
Create test_index_execute_mocken.py
SimonL22 Sep 4, 2024
fbc950f
Create test_index_other_methods.py
SimonL22 Sep 4, 2024
2a11cef
Update test_index_other_methods.py
SimonL22 Sep 7, 2024
e50ef8b
Update pytest.yml
SimonL22 Sep 10, 2024
549132c
Update pytest.yml
SimonL22 Sep 10, 2024
c22f8db
Update pytest.yml
SimonL22 Sep 10, 2024
50468f2
Update pytest.yml
SimonL22 Sep 10, 2024
bbc593a
Update and rename test_status_execute_mocken.py to test_status_execut…
SimonL22 Sep 10, 2024
da508b4
Rename test_index_execute_mocken.py to test_index_execute.py
SimonL22 Sep 10, 2024
47ed435
Update test_status_execute.py
SimonL22 Sep 12, 2024
7a656ea
Update test_status_other_methods.py
SimonL22 Sep 12, 2024
833bad6
Update test_status_execute.py
SimonL22 Sep 12, 2024
f6c759d
Update test_status_other_methods.py
SimonL22 Sep 12, 2024
d3c020b
Delete test/qlever/commands/test_stop_execute
SimonL22 Sep 12, 2024
c1fd7b7
Update test_status_other_methods.py
SimonL22 Oct 9, 2024
fb7bf53
Update test_status_execute.py
SimonL22 Oct 9, 2024
80d0c48
Update test_status_execute.py
SimonL22 Oct 9, 2024
bce038f
Merge branch 'ad-freiburg:main' into test_index_with_mocking
SimonL22 Nov 3, 2024
00618dc
Merge branch 'ad-freiburg:main' into test_status_with_mocking
SimonL22 Nov 3, 2024
289d6b3
Update test_index_other_methods.py
SimonL22 Nov 3, 2024
458cf19
Update test_index_other_methods.py
SimonL22 Nov 3, 2024
c5680a3
Update test_index_execute.py
SimonL22 Nov 3, 2024
a2cbdde
Update test_index_other_methods.py
SimonL22 Nov 4, 2024
44d8cf4
Update test_index_other_methods.py
SimonL22 Nov 4, 2024
6ebe1d8
Update test_index_execute.py
SimonL22 Nov 7, 2024
5a45ecc
Merge branch 'ad-freiburg:main' into test_index_with_mocking
SimonL22 Dec 20, 2024
796da0d
Update index.py
SimonL22 Dec 20, 2024
bcd3eee
Update test_index_other_methods.py
SimonL22 Dec 23, 2024
b15f393
Update test_index_execute.py
SimonL22 Dec 23, 2024
e446092
Merge branch 'ad-freiburg:main' into test_status_with_mocking
SimonL22 Dec 23, 2024
6ef67ed
formatting
joka921 Dec 23, 2024
1cf6d16
fix tests
joka921 Dec 23, 2024
2fc801a
formatting fixed
joka921 Dec 23, 2024
5fb5ab0
also sort the imports via ruff
joka921 Dec 23, 2024
6e7671f
Merge branch 'test_status_with_mocking' into test_index_with_mocking
joka921 Dec 23, 2024
b56199b
formatting
joka921 Dec 23, 2024
3dd365d
Merge branch 'ad-freiburg:main' into test_index_with_mocking
SimonL22 Dec 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/qlever/commands/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def get_input_options_for_json(self, args) -> str:
raise self.InvalidInputJson(
f"Element {i} in `MULTI_INPUT_JSON` must only contain "
"the keys `format`, `graph`, and `parallel`. Contains "
"extra keys {extra_keys}.",
f"extra keys {extra_keys}.",
input_spec,
)
# Add the command-line options for this input stream. We use
Expand Down
368 changes: 368 additions & 0 deletions test/qlever/commands/test_index_execute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,368 @@
from __future__ import annotations

import shlex
import unittest
from unittest.mock import MagicMock, call, patch

from qlever.commands.index import IndexCommand


# Test execute of index command for basic case with successful execution
class TestIndexCommand(unittest.TestCase):
@patch("qlever.commands.index.run_command")
@patch("qlever.commands.index.Containerize")
@patch("qlever.commands.index.get_existing_index_files")
@patch("qlever.commands.index.get_total_file_size")
@patch("qlever.commands.index.glob")
def test_execute_successful_indexing_without_extras(
self,
mock_glob,
mock_get_total_file_size,
mock_get_existing_index_files,
mock_containerize,
mock_run_command,
):
# Setup args
args = MagicMock()
args.name = "TestName"
args.format = "turtle"
args.cat_input_files = "cat input.nt"
args.index_binary = "/test/path/index-binary"
args.settings_json = '{"example": "settings"}'
args.input_files = "*.nt"
args.only_pso_and_pos_permutations = False
args.use_patterns = True
args.parallel_parsing = False
args.text_index = "Test Index"
args.stxxl_memory = False
args.system = "native"
args.show = False
args.overwrite_existing = False
args.index_container = "test_container"
args.image = "test_image"
args.multi_input_json = False

# Mock glob, get_total_file_size, get_existing_index_files,
# run_command and containerize
mock_glob.glob.return_value = ["input1.nt", "input2.nt"]
mock_get_total_file_size.return_value = 5e9 # 5 GB
mock_get_existing_index_files.return_value = []
mock_run_command.return_value = None
mock_containerize.supported_systems.return_value = ["docker"]

# Instantiate and executing the IndexCommand
result = IndexCommand().execute(args)

# Assertions
expected_index_cmd = (
f"{args.cat_input_files} | {args.index_binary}"
f" -i {args.name} -s {args.name}.settings.json"
f" -F {args.format} -f - | tee"
f" {args.name}.index-log.txt"
)
index_cmd_call = call(expected_index_cmd, show_output=True)

expected_settings_json_cmd = (
f"echo {shlex.quote(args.settings_json)} "
f"> {args.name}.settings.json"
)

expected_index_binary_cmd = f"{args.index_binary} --help"

# Testing if run_command was called exactly 3 times with the correct
# parameters and in the correct order
mock_run_command.assert_has_calls(
[
call(expected_index_binary_cmd),
call(expected_settings_json_cmd),
index_cmd_call,
],
any_order=False,
)
assert result

# Test execute for file already existing
@patch("qlever.commands.index.run_command")
@patch("qlever.commands.index.Containerize")
@patch("qlever.commands.index.get_existing_index_files")
@patch("qlever.commands.index.get_total_file_size")
@patch("qlever.commands.index.log")
@patch("qlever.commands.index.glob")
def test_execute_indexing_with_already_existing_files(
self,
mock_glob,
mock_log,
mock_get_total_file_size,
mock_get_existing_index_files,
mock_containerize,
mock_run_command,
):
# Setup args
args = MagicMock()
args.name = "TestName"
args.format = "turtle"
args.cat_input_files = "cat input.nt"
args.index_binary = "/test/path/index-binary"
args.settings_json = '{"example": "settings"}'
args.input_files = "*.nt"
args.only_pso_and_pos_permutations = False
args.use_patterns = True
args.text_index = None
args.stxxl_memory = None
args.system = "native"
args.show = False
args.overwrite_existing = False
args.index_container = "test_container"
args.image = "test_image"
args.multi_input_json = False

# Mock glob, get_total_file_size, get_existing_index_files,
# run_command and containerize
mock_glob.glob.return_value = ["input1.nt", "input2.nt"]
mock_get_total_file_size.return_value = 5e9 # 5 GB
mock_get_existing_index_files.return_value = ["TestName.index"]
mock_run_command.return_value = None
mock_containerize.supported_systems.return_value = []

# Instantiate IndexCommand and execute the function
result = IndexCommand().execute(args)

# Assertions
assert not result
# Verify that the error message was logged
mock_log.error.assert_called_once_with(
f'Index files for basename "{args.name}" found, if you '
f"want to overwrite them, use --overwrite-existing"
)
# Check that the info log contains the exception message
mock_log.info.assert_any_call(
f"Index files found: {mock_get_existing_index_files.return_value}"
)
# Checking if run_command was only called once
# (not after detecting existing files)
mock_run_command.assert_called_once_with(f"{args.index_binary} --help")

# Test execute for no index binary found
@patch("qlever.commands.index.run_command")
@patch("qlever.commands.index.Containerize")
@patch("qlever.commands.index.get_existing_index_files")
@patch("qlever.commands.index.get_total_file_size")
@patch("qlever.commands.index.log")
@patch("qlever.commands.index.glob")
def test_execute_fails_if_no_indexing_binary_is_found(
self,
mock_glob,
mock_log,
mock_get_total_file_size,
mock_get_existing_index_files,
mock_containerize,
mock_run_command,
):
# Setup args
args = MagicMock()
args.name = "TestName"
args.format = "turtle"
args.cat_input_files = "cat input.nt"
args.index_binary = "/test/path/no-binary-found"
args.settings_json = '{"example": "settings"}'
args.input_files = "*.nt"
args.only_pso_and_pos_permutations = False
args.use_patterns = True
args.text_index = None
args.stxxl_memory = None
args.system = "native"
args.show = False
args.overwrite_existing = False
args.index_container = "test_container"
args.image = "test_image"
args.multi_input_json = False

# Mock glob, get_total_file_size, get_existing_index_files,
# run_command and containerize
# if run_command is called throw an Exception with "Binary not found"
mock_glob.glob.return_value = ["input1.nt", "input2.nt"]
mock_get_total_file_size.return_value = 5e9 # 5 GB
mock_get_existing_index_files.return_value = []
mock_run_command.side_effect = Exception("Binary not found")
mock_containerize.supported_systems.return_value = []

# Instantiate IndexCommand and execute the function
result = IndexCommand().execute(args)

# Assertions
self.assertFalse(result)
# Verify that the error message was logged
mock_log.error.assert_called_once_with(
f'Running "{args.index_binary}" failed, '
f"set `--index-binary` to a different binary or "
f"set `--system to a container system`"
)
# Check that the info log contains the exception message
mock_log.info.assert_any_call(
"The error message was: Binary not found"
)
# Checking if run_command was only called once
# (not after throwing an Exception)
mock_run_command.assert_called_once_with(f"{args.index_binary} --help")

# Test execute for file size > 10gb
@patch("qlever.commands.index.run_command")
@patch("qlever.commands.index.Containerize")
@patch("qlever.commands.index.get_existing_index_files")
@patch("qlever.commands.index.get_total_file_size")
@patch("qlever.commands.index.glob")
def test_execute_total_file_size_greater_than_ten_gb(
self,
mock_glob,
mock_get_total_file_size,
mock_get_existing_index_files,
mock_containerize,
mock_run_command,
):
# Setup args
args = MagicMock()
args.name = "TestName"
args.format = "turtle"
args.cat_input_files = "cat input.nt"
args.index_binary = "/test/path/index-binary"
args.settings_json = '{"example": "settings"}'
args.input_files = "*.nt"
args.only_pso_and_pos_permutations = False
args.use_patterns = True
args.parallel_parsing = False
args.text_index = None
args.stxxl_memory = None
args.system = "native"
args.show = False
args.overwrite_existing = False
args.index_container = "test_container"
args.image = "test_image"
args.multi_input_json = False

# Mock glob, get_total_file_size, get_existing_index_files,
# run_command and containerize
mock_glob.glob.return_value = ["input1.nt", "input2.nt"]
mock_get_total_file_size.return_value = 15e9 # 15 GB
mock_get_existing_index_files.return_value = []
mock_run_command.return_value = None
mock_containerize.supported_systems.return_value = []

# Instantiate IndexCommand and execute the function
result = IndexCommand().execute(args)

# Assertions
expected_index_cmd = (
f"ulimit -Sn 1048576; {args.cat_input_files} | {args.index_binary}"
f" -i {args.name} -s {args.name}.settings.json"
f" -F {args.format} -f -"
f" | tee {args.name}.index-log.txt"
)
mock_run_command.assert_any_call(expected_index_cmd, show_output=True)
self.assertTrue(result)

# Test elif branch for multi_input_json
@patch("qlever.commands.index.log")
@patch("qlever.commands.index.json")
def test_execute_get_input_options_error(self, mock_json, mock_log):
# Setup args
args = MagicMock()
args.cat_input_files = False
args.multi_input_json = '{"cmd": "test_data"}'

# Simulate a JSON loading error
mock_json.loads.side_effect = Exception("Wrong format")

# Instantiate IndexCommand and execute the function
result = IndexCommand().execute(args)

error_msg = (
"Failed to parse `MULTI_INPUT_JSON` as either JSON or "
"JSONL (Wrong format)"
)
# Asserts
# Verify that the error message was logged
mock_log.error.assert_called_once_with(error_msg)
# Assert that log_info was called exactly 2 times with the
# correct arguments in order
mock_log.info.assert_has_calls(
[call(""), call(args.multi_input_json)], any_order=False
)
assert not result

# Test else branch for multi_input_json
@patch("qlever.commands.index.log")
def test_execute_cat_files_and_multi_json(self, mock_log):
# Setup args
args = MagicMock()
args.cat_input_files = True
args.multi_input_json = True

# Instantiate IndexCommand and execute the function
result = IndexCommand().execute(args)

error_msg = (
"Specify exactly one of `CAT_INPUT_FILES` (for a "
"single input stream) or `MULTI_INPUT_JSON` (for "
"multiple input streams)"
)
log_msg = "See `qlever index --help` for more information"
# Asserts
# Verify that the error message was logged
mock_log.error.assert_called_once_with(error_msg)
# Assert that log_info was called exactly 2 times with the
# correct arguments in order
mock_log.info.assert_has_calls(
[call(""), call(log_msg)], any_order=False
)
assert not result

# Tests all the extra additions to the index_cmd and the show option
@patch("qlever.commands.index.IndexCommand.get_input_options_for_json")
@patch("qlever.commands.index.IndexCommand.show")
def test_execute_successful_indexing_with_extras_and_show(
self, mock_show, mock_input_json
):
# Setup args
args = MagicMock()
args.name = "TestName"
args.index_binary = "/test/path/index-binary"
args.multi_input_json = True
args.cat_input_files = False
args.only_pso_and_pos_permutations = True
args.use_patterns = False
args.text_index = "from_text_records_and_literals"
args.stxxl_memory = True
args.input_files = "*.nt"
args.system = "native"
args.settings_json = '{"example": "settings"}'
args.show = True

# Mock get_input_options_for_json
mock_input_json.return_value = "test_input_stream"

# Instantiate and executing the IndexCommand
result = IndexCommand().execute(args)

# Assertions
expected_index_cmd = (
f"{args.index_binary}"
f" -i {args.name} -s {args.name}.settings.json"
f" {mock_input_json.return_value}"
f" --only-pso-and-pos-permutations --no-patterns"
f" --no-patterns -w {args.name}.wordsfile.tsv"
f" -d {args.name}.docsfile.tsv"
f" --text-words-from-literals"
f" --stxxl-memory {args.stxxl_memory}"
f" | tee {args.name}.index-log.txt"
)
settings_json_cmd = (
f"echo {shlex.quote(args.settings_json)} "
f"> {args.name}.settings.json"
)

# Verify that show was called with the right parameters
mock_show.assert_called_once_with(
f"{settings_json_cmd}\n" f"{expected_index_cmd}",
only_show=args.show,
)
assert result
Loading
Loading