Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use streamed token counts for Claude and GPT #77

Merged
merged 4 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 76 additions & 31 deletions gptcli/anthropic.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import os
from typing import Iterator, List
from typing import Iterator, List, Optional
import anthropic

from gptcli.completion import (
CompletionEvent,
CompletionProvider,
Message,
CompletionError,
BadRequestError,
MessageDeltaEvent,
Pricing,
UsageEvent,
)

api_key = os.environ.get("ANTHROPIC_API_KEY")
Expand All @@ -19,27 +23,10 @@ def get_client():
return anthropic.Anthropic(api_key=api_key)


def role_to_name(role: str) -> str:
if role == "system" or role == "user":
return anthropic.HUMAN_PROMPT
elif role == "assistant":
return anthropic.AI_PROMPT
else:
raise ValueError(f"Unknown role: {role}")


def make_prompt(messages: List[Message]) -> str:
prompt = "\n".join(
[f"{role_to_name(message['role'])}{message['content']}" for message in messages]
)
prompt += f"{role_to_name('assistant')}"
return prompt


class AnthropicCompletionProvider(CompletionProvider):
def complete(
self, messages: List[Message], args: dict, stream: bool = False
) -> Iterator[str]:
) -> Iterator[CompletionEvent]:
kwargs = {
"stop_sequences": [anthropic.HUMAN_PROMPT],
"max_tokens": 4096,
Expand All @@ -58,26 +45,84 @@ def complete(
kwargs["messages"] = messages

client = get_client()
input_tokens = None
try:
if stream:
with client.messages.stream(**kwargs) as completion:
for text in completion.text_stream:
yield text
for event in completion:
if event.type == "content_block_delta":
yield MessageDeltaEvent(event.delta.text)
if event.type == "message_start":
input_tokens = event.message.usage.input_tokens
if (
event.type == "message_delta"
and (pricing := claude_pricing(args["model"]))
and input_tokens
):
yield UsageEvent.with_pricing(
prompt_tokens=input_tokens,
completion_tokens=event.usage.output_tokens,
total_tokens=input_tokens + event.usage.output_tokens,
pricing=pricing,
)

else:
response = client.messages.create(**kwargs, stream=False)
yield "".join(c.text for c in response.content)
yield MessageDeltaEvent("".join(c.text for c in response.content))
if pricing := claude_pricing(args["model"]):
yield UsageEvent.with_pricing(
prompt_tokens=response.usage.input_tokens,
completion_tokens=response.usage.output_tokens,
total_tokens=response.usage.input_tokens
+ response.usage.output_tokens,
pricing=pricing,
)
except anthropic.BadRequestError as e:
raise BadRequestError(e.message) from e
except anthropic.APIError as e:
raise CompletionError(e.message) from e


def num_tokens_from_messages_anthropic(messages: List[Message], model: str) -> int:
prompt = make_prompt(messages)
client = get_client()
return client.count_tokens(prompt)


def num_tokens_from_completion_anthropic(message: Message, model: str) -> int:
client = get_client()
return client.count_tokens(message["content"])
CLAUDE_PRICE_PER_TOKEN: Pricing = {
"prompt": 11.02 / 1_000_000,
"response": 32.68 / 1_000_000,
}

CLAUDE_INSTANT_PRICE_PER_TOKEN: Pricing = {
"prompt": 1.63 / 1_000_000,
"response": 5.51 / 1_000_000,
}

CLAUDE_3_OPUS_PRICING: Pricing = {
"prompt": 15.0 / 1_000_000,
"response": 75.0 / 1_000_000,
}

CLAUDE_3_SONNET_PRICING: Pricing = {
"prompt": 3.0 / 1_000_000,
"response": 15.0 / 1_000_000,
}

CLAUDE_3_HAIKU_PRICING: Pricing = {
"prompt": 0.25 / 1_000_000,
"response": 1.25 / 1_000_000,
}


def claude_pricing(model: str) -> Optional[Pricing]:
if "instant" in model:
pricing = CLAUDE_INSTANT_PRICE_PER_TOKEN
elif "claude-3" in model:
if "opus" in model:
pricing = CLAUDE_3_OPUS_PRICING
elif "sonnet" in model:
pricing = CLAUDE_3_SONNET_PRICING
elif "haiku" in model:
pricing = CLAUDE_3_HAIKU_PRICING
else:
return None
elif "claude-2" in model:
pricing = CLAUDE_PRICE_PER_TOKEN
else:
return None
return pricing
9 changes: 7 additions & 2 deletions gptcli/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
import platform
from typing import Any, Dict, Iterator, Optional, TypedDict, List

from gptcli.completion import CompletionProvider, ModelOverrides, Message
from gptcli.completion import (
CompletionEvent,
CompletionProvider,
ModelOverrides,
Message,
)
from gptcli.google import GoogleCompletionProvider
from gptcli.llama import LLaMACompletionProvider
from gptcli.openai import OpenAICompletionProvider
Expand Down Expand Up @@ -106,7 +111,7 @@ def _param(self, param: str, override_params: ModelOverrides) -> Any:

def complete_chat(
self, messages, override_params: ModelOverrides = {}, stream: bool = True
) -> Iterator[str]:
) -> Iterator[CompletionEvent]:
model = self._param("model", override_params)
completion_provider = get_completion_provider(model)
return completion_provider.complete(
Expand Down
41 changes: 39 additions & 2 deletions gptcli/completion.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from abc import abstractmethod
from typing import Iterator, List, TypedDict
from typing import Iterator, List, Literal, TypedDict, Union

from attr import dataclass


class Message(TypedDict):
Expand All @@ -13,11 +15,46 @@ class ModelOverrides(TypedDict, total=False):
top_p: float


class Pricing(TypedDict):
prompt: float
response: float


@dataclass
class MessageDeltaEvent:
text: str
type: Literal["message_delta"] = "message_delta"


@dataclass
class UsageEvent:
prompt_tokens: int
completion_tokens: int
total_tokens: int
cost: float
type: Literal["usage"] = "usage"

@staticmethod
def with_pricing(
prompt_tokens: int, completion_tokens: int, total_tokens: int, pricing: Pricing
) -> "UsageEvent":
return UsageEvent(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
cost=prompt_tokens * pricing["prompt"]
+ completion_tokens * pricing["response"],
)


CompletionEvent = Union[MessageDeltaEvent, UsageEvent]


class CompletionProvider:
@abstractmethod
def complete(
self, messages: List[Message], args: dict, stream: bool = False
) -> Iterator[str]:
) -> Iterator[CompletionEvent]:
pass


Expand Down
12 changes: 8 additions & 4 deletions gptcli/composite.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from gptcli.completion import Message, ModelOverrides
from gptcli.completion import Message, ModelOverrides, UsageEvent
from gptcli.session import ChatListener, ResponseStreamer


from typing import List
from typing import List, Optional


class CompositeResponseStreamer(ResponseStreamer):
Expand Down Expand Up @@ -53,7 +53,11 @@ def on_chat_message(self, message: Message):
listener.on_chat_message(message)

def on_chat_response(
self, messages: List[Message], response: Message, overrides: ModelOverrides
self,
messages: List[Message],
response: Message,
overrides: ModelOverrides,
usage: Optional[UsageEvent],
):
for listener in self.listeners:
listener.on_chat_response(messages, response, overrides)
listener.on_chat_response(messages, response, overrides, usage)
Loading
Loading