Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OctoAI support #1691

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions embedchain/docs/components/llms.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Embedchain comes with built-in support for various popular large language models
<Card title="AWS Bedrock" href="#aws-bedrock"></Card>
<Card title="Groq" href="#groq"></Card>
<Card title="NVIDIA AI" href="#nvidia-ai"></Card>
<Card title="Octo AI" href="#octo-ai"></Card>
</CardGroup>

## OpenAI
Expand Down Expand Up @@ -840,6 +841,59 @@ answer = app.query("What is the net worth of Elon Musk today?")
```
</CodeGroup>


## Octo AI

[Octo AI](https://octo.ai/) supports a lot of models from meta, microsoft, mistral, etc... These models are available in the [Octo AI TextGen](https://octoai.cloud/text) and are ready to use in production.


### Usage

In order to use LLMs from Octo AI, sign up on [Octo AI](https://octo.ai/).

Generate token from account settings. Set the token `OCTOAI_API_TOKEN` environment variable.

Below is an example of how to use LLM model and embedding model from NVIDIA AI:

<CodeGroup>

```python main.py
import os
from embedchain import App

os.environ["OCTOAI_API_TOKEN"] = "enter_token"

config = {
"llm": {
"provider": "octoai",
"config": {
"model": "llama-2-13b-chat-fp16",
"max_tokens": 200,
"temperature": 0.1,
"top_p": 0.9,
}
},
"embedder": {
"provider": "huggingface",
"config": {
"model": 'nomic-ai/nomic-embed-text-v1',
"model_kwargs": {
"trust_remote_code": True,
}
}
}
}

app = App.from_config(config=config)

app.add("https://www.forbes.com/profile/elon-musk")
answer = app.query("What is the net worth of Elon Musk today?")
# Answer: Elon Musk's net worth is $222.6 billion as of August 12, 2024. This reflects a change since the previous trading day. He remains the richest person in the world today.
```
</CodeGroup>



## Token Usage

You can get the cost of the query by setting `token_usage` to `True` in the config file. This will return the token details: `prompt_tokens`, `completion_tokens`, `total_tokens`, `total_cost`, `cost_currency`.
Expand Down
1 change: 1 addition & 0 deletions embedchain/embedchain/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class LlmFactory:
"groq": "embedchain.llm.groq.GroqLlm",
"nvidia": "embedchain.llm.nvidia.NvidiaLlm",
"vllm": "embedchain.llm.vllm.VLLM",
"octoai": "embedchain.llm.octoai.OctoAILlm",
}
provider_to_config_class = {
"embedchain": "embedchain.config.llm.base.BaseLlmConfig",
Expand Down
40 changes: 40 additions & 0 deletions embedchain/embedchain/llm/octoai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
from typing import Optional

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint

from embedchain.config import BaseLlmConfig
from embedchain.helpers.json_serializable import register_deserializable
from embedchain.llm.base import BaseLlm


@register_deserializable
class OctoAILlm(BaseLlm):
def __init__(self, config: Optional[BaseLlmConfig] = None):
assert "OCTOAI_API_TOKEN" in os.environ or config.api_key, \
"Please set OCTOAI_API_TOKEN as environment variable."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appreciate adding the assert statment. But can you please also add this:

api_key = os.getenv("OCTOAI_API_TOKEN") or self.config.api_key

super().__init__(config=config)

def get_llm_model_answer(self, prompt):
return self._get_answer(prompt, self.config)

@staticmethod
def _get_answer(prompt: str, config: BaseLlmConfig) -> str:


octoai_api_key = os.getenv("OCTOAI_API_TOKEN") or config.api_key
callbacks = config.callbacks if (not config.stream) or (config.stream and config.callbacks) \
else [StreamingStdOutCallbackHandler()]

chat = OctoAIEndpoint(
octoai_api_token=octoai_api_key,
model_name=config.model,
max_tokens=config.max_tokens,
temperature=config.temperature,
top_p=config.top_p,
streaming=config.stream,
callbacks=callbacks,
)

return chat.invoke(prompt)
1 change: 1 addition & 0 deletions embedchain/embedchain/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ def validate_config(config_data):
"vllm",
"groq",
"nvidia",
"octoai",
),
Optional("config"): {
Optional("model"): str,
Expand Down
36 changes: 36 additions & 0 deletions embedchain/tests/llm/test_octoai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import pytest
from embedchain.config import BaseLlmConfig

from embedchain.llm.octoai import OctoAILlm

@pytest.fixture
def octoai_env():
os.environ["OCTOAI_API_TOKEN"] = "test_api_token"
yield
del os.environ["OCTOAI_API_TOKEN"]

@pytest.fixture
def octoai_llm_config():
config = BaseLlmConfig(
temperature=0.7,
model="llama-2-13b-chat-fp16",
max_tokens=50,
top_p=0.9,
)
return config


def test_get_answer(octoai_llm_config, octoai_env, mocker):
mocked_get_answer = mocker.patch("embedchain.llm.octoai.OctoAILlm._get_answer", return_value="Test answer")

octoai_llm = OctoAILlm(octoai_llm_config)
answer = octoai_llm.get_llm_model_answer("Test query")

assert answer == "Test answer"
mocked_get_answer.assert_called_once()

def test_octo_env_variable(octoai_llm_config):

with pytest.raises(AssertionError):
_ = OctoAILlm(octoai_llm_config)
Loading