Skip to content

Commit

Permalink
feat: FastEmbed embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
Anush008 committed Jul 25, 2024
1 parent fd1d5e0 commit e58a384
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 33 deletions.
41 changes: 41 additions & 0 deletions embedchain/embedchain/embedder/fastembed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from typing import List, Optional, Sequence, Union

try:
from fastembed import TextEmbedding
except ImportError:
raise ValueError("The 'fastembed' package is not installed. Please install it with `pip install fastembed`")

from embedchain.config import BaseEmbedderConfig
from embedchain.embedder.base import BaseEmbedder
from embedchain.models import VectorDimensions

Embedding = Sequence[float]
Embeddings = List[Embedding]


class FastEmbedEmbedder(BaseEmbedder):
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
super().__init__(config)

self.config.model = self.config.model or "BAAI/bge-small-en-v1.5"

embedding_fn = FastEmbedEmbeddingFunction(config=self.config)
self.set_embedding_fn(embedding_fn=embedding_fn)

vector_dimension = self.config.vector_dimension or VectorDimensions.FASTEMBED.value
self.set_vector_dimension(vector_dimension=vector_dimension)


class FastEmbedEmbeddingFunction:
"""
Generate embeddings using FastEmbed - https://qdrant.github.io/fastembed/.
Find the list of supported models at https://qdrant.github.io/fastembed/examples/Supported_Models/.
"""

def __init__(self, config: BaseEmbedderConfig) -> None:
self.config = config
self._model = TextEmbedding(model_name=self.config.model, **self.config.model_kwargs)

def __call__(self, input: Union[list[str], str]) -> List[Embedding]:
embeddings = self._model.embed(input)
return [embedding.tolist() for embedding in embeddings]
1 change: 1 addition & 0 deletions embedchain/embedchain/models/vector_dimensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ class VectorDimensions(Enum):
NVIDIA_AI = 1024
COHERE = 384
OLLAMA = 384
FASTEMBED = 384
40 changes: 10 additions & 30 deletions embedchain/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions embedchain/tests/embedder/test_fastembed_embedder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

from unittest.mock import patch

from embedchain.config import BaseEmbedderConfig
from embedchain.embedder.fastembed import FastEmbedEmbedder


def test_fastembed_embedder_with_model(monkeypatch):
model = "intfloat/multilingual-e5-large"
model_kwargs = {"threads": 5}
config = BaseEmbedderConfig(model=model, model_kwargs=model_kwargs)
with patch('embedchain.embedder.fastembed.TextEmbedding') as mock_embeddings:
embedder = FastEmbedEmbedder(config=config)
assert embedder.config.model == model
assert embedder.config.model_kwargs == model_kwargs
mock_embeddings.assert_called_once_with(
model_name=model,
threads=5
)
2 changes: 1 addition & 1 deletion mem0/embeddings/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class EmbedderConfig(BaseModel):
provider: str = Field(
description="Provider of the embedding model (e.g., 'ollama', 'openai')",
description="Provider of the embedding model (e.g., 'ollama', 'openai', 'fastembed')",
default="openai",
)
config: Optional[dict] = Field(
Expand Down
26 changes: 26 additions & 0 deletions mem0/embeddings/fastembed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from embedding.base import EmbeddingBase

try:
from fastembed import TextEmbedding
except ImportError as e:
raise ImportError(
"The 'fastembed' package is not installed. Please install it with `pip install fastembed`"
) from e


class FastEmbedEmbedding(EmbeddingBase):
"""
Generate embeddings vector embeddings using FastEmbed - https://qdrant.github.io/fastembed/.
Find the list of supported models at https://qdrant.github.io/fastembed/examples/Supported_Models/.
"""

def __init__(
self,
model="BAAI/bge-small-en-v1.5",
) -> None:
self.model = model
self.dims = 384
self._model = TextEmbedding(model_name=model)

def embed(self, text):
return next(self._model.embed(text)).tolist()
1 change: 0 additions & 1 deletion mem0/memory/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import logging
import os
import time
Expand Down
3 changes: 2 additions & 1 deletion mem0/utils/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class EmbedderFactory:
provider_to_class = {
"openai": "mem0.embeddings.openai.OpenAIEmbedding",
"ollama": "mem0.embeddings.ollama.OllamaEmbedding",
"huggingface": "mem0.embeddings.huggingface.HuggingFaceEmbedding"
"huggingface": "mem0.embeddings.huggingface.HuggingFaceEmbedding",
"fastembed": "mem0.embeddings.fastembed.FastEmbedEmbedding",
}

@classmethod
Expand Down

0 comments on commit e58a384

Please sign in to comment.