Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run integration tests in CI #4

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/_integration_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: integration-test

on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"

env:
POETRY_VERSION: "1.7.1"
DOCKER_COMPOSE_YAML: "libs/elasticsearch/integration_tests/docker-compose.yml"

jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: "Integration tests"
steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: integration-test

- name: Install dependencies
shell: bash
run: poetry install --with=test_integration,test

- name: Start containers
shell: bash
run: docker-compose -f "$DOCKER_COMPOSE_YAML" up elasticsearch -d --build

- name: Run integration tests
shell: bash
env:
OPENAI_API_KEY: ${{ secrets.SuperSecret }}
run: make integration_test

- name: Stop containers
if: always()
shell: bash
run: docker-compose -f "$DOCKER_COMPOSE_YAML" down elasticsearch
15 changes: 14 additions & 1 deletion .github/workflows/check_diffs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
uses: ./.github/workflows/_compile_integration_test.yml
with:
working-directory: ${{ matrix.working-directory }}
secrets: inherit

ci_success:
name: "CI Success"
needs: [build, lint, test, compile-integration-tests]
Expand All @@ -87,3 +87,16 @@ jobs:
echo $RESULTS_JSON
echo "Exiting with $EXIT_CODE"
exit $EXIT_CODE
secrets: inherit

# integration-test:
# name: cd ${{ matrix.working-directory }}
# needs: [ compile-interation-tests ]
# if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
# strategy:
# matrix:
# working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
# uses: ./.github/workflows/_integration_test.yml
# with:
# working-directory: ${{ matrix.working-directory }}
# secrets: inherit
39 changes: 37 additions & 2 deletions libs/elasticsearch/tests/integration_tests/_test_utilities.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import os
from typing import Any, Dict, List
import time
from typing import Any, Dict, List, Optional

from elastic_transport import Transport
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch, NotFoundError

from langchain_elasticsearch._utilities import check_if_model_deployed


def clear_test_indices(es: Elasticsearch) -> None:
Expand Down Expand Up @@ -40,3 +43,35 @@ def perform_request(self, *args, **kwargs): # type: ignore
es = Elasticsearch(hosts=[es_url], transport_class=CustomTransport)

return es


def deploy_model(
es_client: Elasticsearch,
model_id: str = ".elser_model_2",
field: str = "text_field",
model_type: Optional[str] = None,
inference_config: Optional[Dict] = None,
) -> None:
try:
check_if_model_deployed(es_client, model_id)
except NotFoundError:
# download the model
es_client.ml.put_trained_model(
model_id=model_id,
input={"field_names": [field]},
model_type=model_type,
inference_config=inference_config,
)

# wait until ready
while True:
status = es_client.ml.get_trained_models(
model_id=model_id, include="definition_status"
)
if status["trained_model_configs"][0]["fully_defined"]:
# model is ready
break
time.sleep(1)

# deploy the model
es_client.ml.start_trained_model_deployment(model_id=model_id)
76 changes: 33 additions & 43 deletions libs/elasticsearch/tests/integration_tests/test_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,38 @@
"""Test elasticsearch_embeddings embeddings."""

import pytest
from langchain_core.utils import get_from_env
import os

from elasticsearch import Elasticsearch

from langchain_elasticsearch.embeddings import ElasticsearchEmbeddings

# deployed with
# https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-text-emb-vector-search-example.html
DEFAULT_MODEL = "sentence-transformers__msmarco-minilm-l-12-v3"
DEFAULT_NUM_DIMENSIONS = "384"


@pytest.fixture
def model_id() -> str:
return get_from_env("model_id", "MODEL_ID", DEFAULT_MODEL)


@pytest.fixture
def expected_num_dimensions() -> int:
return int(
get_from_env(
"expected_num_dimensions", "EXPECTED_NUM_DIMENSIONS", DEFAULT_NUM_DIMENSIONS
)
)


def test_elasticsearch_embedding_documents(
model_id: str, expected_num_dimensions: int
) -> None:
"""Test Elasticsearch embedding documents."""
documents = ["foo bar", "bar foo", "foo"]
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
output = embedding.embed_documents(documents)
assert len(output) == 3
assert len(output[0]) == expected_num_dimensions
assert len(output[1]) == expected_num_dimensions
assert len(output[2]) == expected_num_dimensions


def test_elasticsearch_embedding_query(
model_id: str, expected_num_dimensions: int
) -> None:
"""Test Elasticsearch embedding query."""
document = "foo bar"
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
output = embedding.embed_query(document)
assert len(output) == expected_num_dimensions
from ._test_utilities import deploy_model

ES_CLIENT = Elasticsearch(hosts=[os.environ.get("ES_URL", "http://localhost:9200")])
MODEL_ID = ".elser_model_2"


class TestEmbeddings:
@classmethod
def setup_class(cls) -> None:
deploy_model(ES_CLIENT, MODEL_ID)

def test_elasticsearch_embedding_documents(self) -> None:
"""Test Elasticsearch embedding documents."""
documents = ["foo bar", "bar foo", "foo"]
embedding = ElasticsearchEmbeddings(ES_CLIENT.ml, MODEL_ID)
output = embedding.embed_documents(documents)
assert len(output) == 3
assert "foo" in output[0]
assert "##bar" in output[0]
assert "bar" in output[1]
assert "foo" in output[1]
assert "foo" in output[2]

def test_elasticsearch_embedding_query(self) -> None:
"""Test Elasticsearch embedding query."""
document = "foo bar"
embedding = ElasticsearchEmbeddings(ES_CLIENT.ml, MODEL_ID)
output = embedding.embed_query(document)
assert "foo" in output
assert "##bar" in output
21 changes: 9 additions & 12 deletions libs/elasticsearch/tests/integration_tests/test_vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
ConsistentFakeEmbeddings,
FakeEmbeddings,
)
from ._test_utilities import clear_test_indices, requests_saving_es_client
from ._test_utilities import clear_test_indices, deploy_model, requests_saving_es_client

logging.basicConfig(level=logging.DEBUG)

Expand All @@ -40,17 +40,11 @@
"""

modelsDeployed: List[str] = [
# ".elser_model_1",
# "sentence-transformers__all-minilm-l6-v2",
]


class TestElasticsearch:
@classmethod
def setup_class(cls) -> None:
if not os.getenv("OPENAI_API_KEY"):
raise ValueError("OPENAI_API_KEY environment variable is not set")

@pytest.fixture(scope="class", autouse=True)
def elasticsearch_connection(self) -> Union[dict, Generator[dict, None, None]]:
es_url = os.environ.get("ES_URL", "http://localhost:9200")
Expand Down Expand Up @@ -708,20 +702,23 @@ def assert_query(query_body: dict, query: str) -> dict:
output = docsearch.similarity_search("bar", k=1)
assert output == [Document(page_content="bar")]

@pytest.mark.skipif(
".elser_model_1" not in modelsDeployed,
reason="ELSER not deployed in ML Node, skipping test",
)
def test_similarity_search_with_sparse_infer_instack(
self, elasticsearch_connection: dict, index_name: str
) -> None:
"""test end to end with sparse retrieval strategy and inference in-stack"""
model_id = ".elser_model_2"

es_client = ElasticsearchStore.connect_to_elasticsearch(
**elasticsearch_connection
)
deploy_model(es_client, model_id)

texts = ["foo", "bar", "baz"]
docsearch = ElasticsearchStore.from_texts(
texts,
**elasticsearch_connection,
index_name=index_name,
strategy=ElasticsearchStore.SparseVectorRetrievalStrategy(),
strategy=ElasticsearchStore.SparseVectorRetrievalStrategy(model_id),
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
Expand Down