mem0ai · PranavPuranik · Aug 13, 2024 · Aug 13, 2024 · Aug 31, 2024 · Oct 13, 2024
diff --git a/embedchain/docs/components/llms.mdx b/embedchain/docs/components/llms.mdx
@@ -25,6 +25,7 @@ Embedchain comes with built-in support for various popular large language models
   <Card title="AWS Bedrock" href="#aws-bedrock"></Card>
   <Card title="Groq" href="#groq"></Card>
   <Card title="NVIDIA AI" href="#nvidia-ai"></Card>
+  <Card title="Octo AI" href="#octo-ai"></Card>
 </CardGroup>
 
 ## OpenAI
@@ -840,6 +841,59 @@ answer = app.query("What is the net worth of Elon Musk today?")
 ```
 </CodeGroup>
 
+
+## Octo AI
+
+[Octo AI](https://octo.ai/) supports a lot of models from meta, microsoft, mistral, etc... These models are available in the [Octo AI TextGen](https://octoai.cloud/text) and are ready to use in production.
+
+
+### Usage
+
+In order to use LLMs from Octo AI, sign up on [Octo AI](https://octo.ai/).
+
+Generate token from account settings. Set the token `OCTOAI_API_TOKEN` environment variable.
+
+Below is an example of how to use LLM model and embedding model from NVIDIA AI:
+
+<CodeGroup>
+
+```python main.py
+import os
+from embedchain import App
+
+os.environ["OCTOAI_API_TOKEN"] = "enter_token"
+
+config = {
+    "llm": {
+        "provider": "octoai",
+        "config": {
+            "model": "llama-2-13b-chat-fp16",
+            "max_tokens": 200,
+            "temperature": 0.1,
+            "top_p": 0.9,
+        }
+    },
+    "embedder": {
+        "provider": "huggingface",
+        "config": {
+            "model": 'nomic-ai/nomic-embed-text-v1',
+            "model_kwargs": {
+                "trust_remote_code": True,
+            }
+        }
+    }
+}
+
+app = App.from_config(config=config)
+
+app.add("https://www.forbes.com/profile/elon-musk")
+answer = app.query("What is the net worth of Elon Musk today?")
+# Answer: Elon Musk's net worth is $222.6 billion as of August 12, 2024. This reflects a change since the previous trading day. He remains the richest person in the world today.
+```
+</CodeGroup>
+
+
+
 ## Token Usage
 
 You can get the cost of the query by setting `token_usage` to `True` in the config file. This will return the token details: `prompt_tokens`, `completion_tokens`, `total_tokens`, `total_cost`, `cost_currency`.

diff --git a/embedchain/embedchain/factory.py b/embedchain/embedchain/factory.py
@@ -27,6 +27,7 @@ class LlmFactory:
         "groq": "embedchain.llm.groq.GroqLlm",
         "nvidia": "embedchain.llm.nvidia.NvidiaLlm",
         "vllm": "embedchain.llm.vllm.VLLM",
+        "octoai": "embedchain.llm.octoai.OctoAILlm",
     }
     provider_to_config_class = {
         "embedchain": "embedchain.config.llm.base.BaseLlmConfig",

diff --git a/embedchain/embedchain/llm/octoai.py b/embedchain/embedchain/llm/octoai.py
@@ -0,0 +1,40 @@
+import os
+from typing import Optional
+
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
+
+from embedchain.config import BaseLlmConfig
+from embedchain.helpers.json_serializable import register_deserializable
+from embedchain.llm.base import BaseLlm
+
+
+@register_deserializable
+class OctoAILlm(BaseLlm):
+    def __init__(self, config: Optional[BaseLlmConfig] = None):
+        assert "OCTOAI_API_TOKEN" in os.environ or config.api_key, \
+            "Please set OCTOAI_API_TOKEN as environment variable."
+        super().__init__(config=config)
+
+    def get_llm_model_answer(self, prompt):
+        return self._get_answer(prompt, self.config)
+
+    @staticmethod
+    def _get_answer(prompt: str, config: BaseLlmConfig) -> str:
+
+
+        octoai_api_key = os.getenv("OCTOAI_API_TOKEN") or config.api_key
+        callbacks = config.callbacks if (not config.stream) or (config.stream and config.callbacks) \
+                        else [StreamingStdOutCallbackHandler()]
+
+        chat = OctoAIEndpoint(
+            octoai_api_token=octoai_api_key,
+            model_name=config.model,
+            max_tokens=config.max_tokens,
+            temperature=config.temperature,
+            top_p=config.top_p,
+            streaming=config.stream,
+            callbacks=callbacks,
+        )
+
+        return chat.invoke(prompt)
diff --git a/embedchain/embedchain/utils/misc.py b/embedchain/embedchain/utils/misc.py
@@ -418,6 +418,7 @@ def validate_config(config_data):
                     "vllm",
                     "groq",
                     "nvidia",
+                    "octoai",
                 ),
                 Optional("config"): {
                     Optional("model"): str,

diff --git a/embedchain/tests/llm/test_octoai.py b/embedchain/tests/llm/test_octoai.py
@@ -0,0 +1,36 @@
+import os
+import pytest
+from embedchain.config import BaseLlmConfig
+
+from embedchain.llm.octoai import OctoAILlm
+
+@pytest.fixture
+def octoai_env():
+    os.environ["OCTOAI_API_TOKEN"] = "test_api_token"
+    yield
+    del os.environ["OCTOAI_API_TOKEN"]
+
+@pytest.fixture
+def octoai_llm_config():
+    config = BaseLlmConfig(
+        temperature=0.7,
+        model="llama-2-13b-chat-fp16",
+        max_tokens=50,
+        top_p=0.9,
+    )
+    return config
+
+
+def test_get_answer(octoai_llm_config, octoai_env, mocker):
+    mocked_get_answer = mocker.patch("embedchain.llm.octoai.OctoAILlm._get_answer", return_value="Test answer")
+
+    octoai_llm = OctoAILlm(octoai_llm_config)
+    answer = octoai_llm.get_llm_model_answer("Test query")
+
+    assert answer == "Test answer"
+    mocked_get_answer.assert_called_once()
+
+def test_octo_env_variable(octoai_llm_config):
+
+    with pytest.raises(AssertionError):
+        _ = OctoAILlm(octoai_llm_config)