diff --git a/packages/tasks/src/tasks/chat-completion/inference.ts b/packages/tasks/src/tasks/chat-completion/inference.ts index 488a1e87e..df310d279 100644 --- a/packages/tasks/src/tasks/chat-completion/inference.ts +++ b/packages/tasks/src/tasks/chat-completion/inference.ts @@ -49,7 +49,7 @@ export interface ChatCompletionInput { * [UNUSED] ID of the model to use. See the model endpoint compatibility table for details * on which models work with the Chat API. */ - model: string; + model?: string; /** * UNUSED * How many chat completion choices to generate for each input message. Note that you will @@ -63,6 +63,7 @@ export interface ChatCompletionInput { * increasing the model's likelihood to talk about new topics */ presence_penalty?: number; + response_format?: ChatCompletionInputGrammarType; seed?: number; /** * Up to 4 sequences where the API will stop generating further tokens. @@ -77,7 +78,7 @@ export interface ChatCompletionInput { * We generally recommend altering this or `top_p` but not both. */ temperature?: number; - tool_choice?: ChatCompletionInputToolType; + tool_choice?: ChatCompletionInputTool; /** * A prompt to be appended before the tools */ @@ -87,7 +88,7 @@ export interface ChatCompletionInput { * Use this to provide a list of * functions the model may generate JSON inputs for. */ - tools?: ChatCompletionInputTool[]; + tools?: ToolElement[]; /** * An integer between 0 and 5 specifying the number of most likely tokens to return at each * token position, each with @@ -105,40 +106,67 @@ export interface ChatCompletionInput { } export interface ChatCompletionInputMessage { - content?: string; + content: ChatCompletionInputMessageContent; name?: string; role: string; - tool_calls?: ChatCompletionInputToolCall[]; [property: string]: unknown; } -export interface ChatCompletionInputToolCall { - function: ChatCompletionInputFunctionDefinition; - id: number; - type: string; +export type ChatCompletionInputMessageContent = ChatCompletionInputMessageChunk[] | string; + +export interface ChatCompletionInputMessageChunk { + image_url?: ChatCompletionInputURL; + text?: string; + type: ChatCompletionInputMessageChunkType; [property: string]: unknown; } -export interface ChatCompletionInputFunctionDefinition { - arguments: unknown; - description?: string; - name: string; +export interface ChatCompletionInputURL { + url: string; + [property: string]: unknown; +} + +export type ChatCompletionInputMessageChunkType = "text" | "image_url"; + +export interface ChatCompletionInputGrammarType { + type: ChatCompletionInputGrammarTypeType; + /** + * A string that represents a [JSON Schema](https://json-schema.org/). + * + * JSON Schema is a declarative language that allows to annotate JSON documents + * with types and descriptions. + */ + value: unknown; [property: string]: unknown; } -export type ChatCompletionInputToolType = "OneOf" | ChatCompletionInputToolTypeObject; +export type ChatCompletionInputGrammarTypeType = "json" | "regex"; + +export type ChatCompletionInputTool = ChatCompletionInputToolType | string; + +export interface ChatCompletionInputToolType { + function?: ChatCompletionInputFunctionName; + [property: string]: unknown; +} -export interface ChatCompletionInputToolTypeObject { - FunctionName: string; +export interface ChatCompletionInputFunctionName { + name: string; [property: string]: unknown; } -export interface ChatCompletionInputTool { +export interface ToolElement { function: ChatCompletionInputFunctionDefinition; type: string; [property: string]: unknown; } +export interface ChatCompletionInputFunctionDefinition { + arguments: unknown; + description?: string; + name: string; + [property: string]: unknown; +} + /** * Chat Completion Output. * @@ -151,7 +179,6 @@ export interface ChatCompletionOutput { created: number; id: string; model: string; - object: string; system_fingerprint: string; usage: ChatCompletionOutputUsage; [property: string]: unknown; @@ -161,7 +188,7 @@ export interface ChatCompletionOutputComplete { finish_reason: string; index: number; logprobs?: ChatCompletionOutputLogprobs; - message: ChatCompletionOutputMessage; + message: ChatCompletionOutputOutputMessage; [property: string]: unknown; } @@ -183,9 +210,8 @@ export interface ChatCompletionOutputTopLogprob { [property: string]: unknown; } -export interface ChatCompletionOutputMessage { +export interface ChatCompletionOutputOutputMessage { content?: string; - name?: string; role: string; tool_calls?: ChatCompletionOutputToolCall[]; [property: string]: unknown; @@ -193,7 +219,7 @@ export interface ChatCompletionOutputMessage { export interface ChatCompletionOutputToolCall { function: ChatCompletionOutputFunctionDefinition; - id: number; + id: string; type: string; [property: string]: unknown; } @@ -224,7 +250,6 @@ export interface ChatCompletionStreamOutput { created: number; id: string; model: string; - object: string; system_fingerprint: string; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/chat-completion/spec/input.json b/packages/tasks/src/tasks/chat-completion/spec/input.json index 0b549cd58..fe99a9a2a 100644 --- a/packages/tasks/src/tasks/chat-completion/spec/input.json +++ b/packages/tasks/src/tasks/chat-completion/spec/input.json @@ -4,7 +4,7 @@ "description": "Chat Completion Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", "title": "ChatCompletionInput", "type": "object", - "required": ["model", "messages"], + "required": ["messages"], "properties": { "frequency_penalty": { "type": "number", @@ -47,7 +47,8 @@ "model": { "type": "string", "description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.", - "example": "mistralai/Mistral-7B-Instruct-v0.2" + "example": "mistralai/Mistral-7B-Instruct-v0.2", + "nullable": true }, "n": { "type": "integer", @@ -64,6 +65,15 @@ "example": 0.1, "nullable": true }, + "response_format": { + "allOf": [ + { + "$ref": "#/$defs/ChatCompletionInputGrammarType" + } + ], + "default": "null", + "nullable": true + }, "seed": { "type": "integer", "format": "int64", @@ -93,7 +103,7 @@ "tool_choice": { "allOf": [ { - "$ref": "#/$defs/ChatCompletionInputToolType" + "$ref": "#/$defs/ChatCompletionInputToolChoice" } ], "nullable": true @@ -132,12 +142,10 @@ "$defs": { "ChatCompletionInputMessage": { "type": "object", - "required": ["role"], + "required": ["role", "content"], "properties": { "content": { - "type": "string", - "example": "My name is David and I", - "nullable": true + "$ref": "#/$defs/ChatCompletionInputMessageContent" }, "name": { "type": "string", @@ -147,68 +155,148 @@ "role": { "type": "string", "example": "user" + } + }, + "title": "ChatCompletionInputMessage" + }, + "ChatCompletionInputMessageContent": { + "oneOf": [ + { + "type": "string" }, - "tool_calls": { + { "type": "array", "items": { - "$ref": "#/$defs/ChatCompletionInputToolCall" - }, - "nullable": true + "$ref": "#/$defs/ChatCompletionInputMessageChunk" + } + } + ], + "title": "ChatCompletionInputMessageContent" + }, + "ChatCompletionInputMessageChunk": { + "oneOf": [ + { + "type": "object", + "required": ["text", "type"], + "properties": { + "text": { + "type": "string" + }, + "type": { + "type": "string", + "enum": ["text"] + } + } + }, + { + "type": "object", + "required": ["image_url", "type"], + "properties": { + "image_url": { + "$ref": "#/$defs/ChatCompletionInputUrl" + }, + "type": { + "type": "string", + "enum": ["image_url"] + } + } } + ], + "discriminator": { + "propertyName": "type" }, - "title": "ChatCompletionInputMessage" + "title": "ChatCompletionInputMessageChunk" }, - "ChatCompletionInputToolCall": { + "ChatCompletionInputUrl": { "type": "object", - "required": ["id", "type", "function"], + "required": ["url"], "properties": { - "function": { - "$ref": "#/$defs/ChatCompletionInputFunctionDefinition" - }, - "id": { - "type": "integer", - "format": "int32", - "minimum": 0 - }, - "type": { + "url": { "type": "string" } }, - "title": "ChatCompletionInputToolCall" + "title": "ChatCompletionInputUrl" }, - "ChatCompletionInputFunctionDefinition": { - "type": "object", - "required": ["name", "arguments"], - "properties": { - "arguments": {}, - "description": { - "type": "string", - "nullable": true + "ChatCompletionInputGrammarType": { + "oneOf": [ + { + "type": "object", + "required": ["type", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["json"] + }, + "value": { + "description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions." + } + } }, - "name": { - "type": "string" + { + "type": "object", + "required": ["type", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["regex"] + }, + "value": { + "type": "string" + } + } } + ], + "discriminator": { + "propertyName": "type" }, - "title": "ChatCompletionInputFunctionDefinition" + "title": "ChatCompletionInputGrammarType" + }, + "ChatCompletionInputToolChoice": { + "allOf": [ + { + "$ref": "#/$defs/ChatCompletionInputToolType" + } + ], + "nullable": true, + "title": "ChatCompletionInputToolChoice" }, "ChatCompletionInputToolType": { "oneOf": [ { "type": "object", - "required": ["FunctionName"], + "default": null, + "nullable": true + }, + { + "type": "string" + }, + { + "type": "object", + "required": ["function"], "properties": { - "FunctionName": { - "type": "string" + "function": { + "$ref": "#/$defs/ChatCompletionInputFunctionName" } } }, { - "type": "string", - "enum": ["OneOf"] + "type": "object", + "default": null, + "nullable": true } ], "title": "ChatCompletionInputToolType" }, + "ChatCompletionInputFunctionName": { + "type": "object", + "required": ["name"], + "properties": { + "name": { + "type": "string" + } + }, + "title": "ChatCompletionInputFunctionName" + }, "ChatCompletionInputTool": { "type": "object", "required": ["type", "function"], @@ -222,6 +310,21 @@ } }, "title": "ChatCompletionInputTool" + }, + "ChatCompletionInputFunctionDefinition": { + "type": "object", + "required": ["name", "arguments"], + "properties": { + "arguments": {}, + "description": { + "type": "string", + "nullable": true + }, + "name": { + "type": "string" + } + }, + "title": "ChatCompletionInputFunctionDefinition" } } } diff --git a/packages/tasks/src/tasks/chat-completion/spec/output.json b/packages/tasks/src/tasks/chat-completion/spec/output.json index 5b602ccd6..a3ca487a3 100644 --- a/packages/tasks/src/tasks/chat-completion/spec/output.json +++ b/packages/tasks/src/tasks/chat-completion/spec/output.json @@ -4,7 +4,7 @@ "description": "Chat Completion Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", "title": "ChatCompletionOutput", "type": "object", - "required": ["id", "object", "created", "model", "system_fingerprint", "choices", "usage"], + "required": ["id", "created", "model", "system_fingerprint", "choices", "usage"], "properties": { "choices": { "type": "array", @@ -25,9 +25,6 @@ "type": "string", "example": "mistralai/Mistral-7B-Instruct-v0.2" }, - "object": { - "type": "string" - }, "system_fingerprint": { "type": "string" }, @@ -57,7 +54,7 @@ "nullable": true }, "message": { - "$ref": "#/$defs/ChatCompletionOutputMessage" + "$ref": "#/$defs/ChatCompletionOutputOutputMessage" } }, "title": "ChatCompletionOutputComplete" @@ -109,33 +106,48 @@ }, "title": "ChatCompletionOutputTopLogprob" }, - "ChatCompletionOutputMessage": { + "ChatCompletionOutputOutputMessage": { + "oneOf": [ + { + "$ref": "#/$defs/ChatCompletionOutputTextMessage" + }, + { + "$ref": "#/$defs/ChatCompletionOutputToolCallMessage" + } + ], + "title": "ChatCompletionOutputOutputMessage" + }, + "ChatCompletionOutputTextMessage": { "type": "object", - "required": ["role"], + "required": ["role", "content"], "properties": { "content": { "type": "string", - "example": "My name is David and I", - "nullable": true - }, - "name": { - "type": "string", - "example": "\"David\"", - "nullable": true + "example": "My name is David and I" }, "role": { "type": "string", "example": "user" + } + }, + "title": "ChatCompletionOutputTextMessage" + }, + "ChatCompletionOutputToolCallMessage": { + "type": "object", + "required": ["role", "tool_calls"], + "properties": { + "role": { + "type": "string", + "example": "assistant" }, "tool_calls": { "type": "array", "items": { "$ref": "#/$defs/ChatCompletionOutputToolCall" - }, - "nullable": true + } } }, - "title": "ChatCompletionOutputMessage" + "title": "ChatCompletionOutputToolCallMessage" }, "ChatCompletionOutputToolCall": { "type": "object", @@ -145,9 +157,7 @@ "$ref": "#/$defs/ChatCompletionOutputFunctionDefinition" }, "id": { - "type": "integer", - "format": "int32", - "minimum": 0 + "type": "string" }, "type": { "type": "string" diff --git a/packages/tasks/src/tasks/chat-completion/spec/stream_output.json b/packages/tasks/src/tasks/chat-completion/spec/stream_output.json index 72575d913..08e1804a8 100644 --- a/packages/tasks/src/tasks/chat-completion/spec/stream_output.json +++ b/packages/tasks/src/tasks/chat-completion/spec/stream_output.json @@ -4,7 +4,7 @@ "description": "Chat Completion Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", "title": "ChatCompletionStreamOutput", "type": "object", - "required": ["id", "object", "created", "model", "system_fingerprint", "choices"], + "required": ["id", "created", "model", "system_fingerprint", "choices"], "properties": { "choices": { "type": "array", @@ -25,9 +25,6 @@ "type": "string", "example": "mistralai/Mistral-7B-Instruct-v0.2" }, - "object": { - "type": "string" - }, "system_fingerprint": { "type": "string" } @@ -61,28 +58,44 @@ "title": "ChatCompletionStreamOutputChoice" }, "ChatCompletionStreamOutputDelta": { + "oneOf": [ + { + "$ref": "#/$defs/ChatCompletionStreamOutputTextMessage" + }, + { + "$ref": "#/$defs/ChatCompletionStreamOutputToolCallDelta" + } + ], + "title": "ChatCompletionStreamOutputDelta" + }, + "ChatCompletionStreamOutputTextMessage": { "type": "object", - "required": ["role"], + "required": ["role", "content"], "properties": { "content": { "type": "string", - "example": "What is Deep Learning?", - "nullable": true + "example": "My name is David and I" }, "role": { "type": "string", "example": "user" + } + }, + "title": "ChatCompletionStreamOutputTextMessage" + }, + "ChatCompletionStreamOutputToolCallDelta": { + "type": "object", + "required": ["role", "tool_calls"], + "properties": { + "role": { + "type": "string", + "example": "assistant" }, "tool_calls": { - "allOf": [ - { - "$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall" - } - ], - "nullable": true + "$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall" } }, - "title": "ChatCompletionStreamOutputDelta" + "title": "ChatCompletionStreamOutputToolCallDelta" }, "ChatCompletionStreamOutputDeltaToolCall": { "type": "object", diff --git a/packages/tasks/src/tasks/feature-extraction/inference.ts b/packages/tasks/src/tasks/feature-extraction/inference.ts index 96194d7e9..404b10308 100644 --- a/packages/tasks/src/tasks/feature-extraction/inference.ts +++ b/packages/tasks/src/tasks/feature-extraction/inference.ts @@ -23,7 +23,7 @@ export interface FeatureExtractionInput { * The name of the prompt that should be used by for encoding. If not set, no prompt * will be applied. * - * Must be a key in the `Sentence Transformers` configuration `prompts` dictionary. + * Must be a key in the `sentence-transformers` configuration `prompts` dictionary. * * For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", * ...}, diff --git a/packages/tasks/src/tasks/feature-extraction/spec/input.json b/packages/tasks/src/tasks/feature-extraction/spec/input.json index 94e8d7a0b..a4fec711a 100644 --- a/packages/tasks/src/tasks/feature-extraction/spec/input.json +++ b/packages/tasks/src/tasks/feature-extraction/spec/input.json @@ -17,7 +17,7 @@ }, "prompt_name": { "type": "string", - "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `Sentence Transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.", + "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `sentence-transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.", "default": "null", "example": "null", "nullable": true diff --git a/packages/tasks/src/tasks/text-generation/inference.ts b/packages/tasks/src/tasks/text-generation/inference.ts index 37395c580..c906e6fcb 100644 --- a/packages/tasks/src/tasks/text-generation/inference.ts +++ b/packages/tasks/src/tasks/text-generation/inference.ts @@ -19,23 +19,84 @@ export interface TextGenerationInput { } export interface TextGenerationInputGenerateParameters { + /** + * Lora adapter id + */ + adapter_id?: string; + /** + * Generate best_of sequences and return the one if the highest token logprobs. + */ best_of?: number; + /** + * Whether to return decoder input token logprobs and ids. + */ decoder_input_details?: boolean; + /** + * Whether to return generation details. + */ details?: boolean; + /** + * Activate logits sampling. + */ do_sample?: boolean; + /** + * The parameter for frequency penalty. 1.0 means no penalty + * Penalize new tokens based on their existing frequency in the text so far, + * decreasing the model's likelihood to repeat the same line verbatim. + */ frequency_penalty?: number; grammar?: TextGenerationInputGrammarType; + /** + * Maximum number of tokens to generate. + */ max_new_tokens?: number; + /** + * The parameter for repetition penalty. 1.0 means no penalty. + * See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + */ repetition_penalty?: number; + /** + * Whether to prepend the prompt to the generated text + */ return_full_text?: boolean; + /** + * Random sampling seed. + */ seed?: number; + /** + * Stop generating tokens if a member of `stop` is generated. + */ stop?: string[]; + /** + * The value used to module the logits distribution. + */ temperature?: number; + /** + * The number of highest probability vocabulary tokens to keep for top-k-filtering. + */ top_k?: number; + /** + * The number of highest probability vocabulary tokens to keep for top-n-filtering. + */ top_n_tokens?: number; + /** + * Top-p value for nucleus sampling. + */ top_p?: number; + /** + * Truncate inputs tokens to the given size. + */ truncate?: number; + /** + * Typical Decoding mass + * See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) + * for more information. + */ typical_p?: number; + /** + * Watermarking with [A Watermark for Large Language + * Models](https://arxiv.org/abs/2301.10226). + */ watermark?: boolean; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/text-generation/spec/input.json b/packages/tasks/src/tasks/text-generation/spec/input.json index 0742cefe0..108d9fb3c 100644 --- a/packages/tasks/src/tasks/text-generation/spec/input.json +++ b/packages/tasks/src/tasks/text-generation/spec/input.json @@ -22,8 +22,16 @@ "TextGenerationInputGenerateParameters": { "type": "object", "properties": { + "adapter_id": { + "type": "string", + "description": "Lora adapter id", + "default": "null", + "example": "null", + "nullable": true + }, "best_of": { "type": "integer", + "description": "Generate best_of sequences and return the one if the highest token logprobs.", "default": "null", "example": 1, "nullable": true, @@ -32,20 +40,24 @@ }, "decoder_input_details": { "type": "boolean", + "description": "Whether to return decoder input token logprobs and ids.", "default": "false" }, "details": { "type": "boolean", + "description": "Whether to return generation details.", "default": "true" }, "do_sample": { "type": "boolean", + "description": "Activate logits sampling.", "default": "false", "example": true }, "frequency_penalty": { "type": "number", "format": "float", + "description": "The parameter for frequency penalty. 1.0 means no penalty\nPenalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.", "default": "null", "example": 0.1, "nullable": true, @@ -63,6 +75,7 @@ "max_new_tokens": { "type": "integer", "format": "int32", + "description": "Maximum number of tokens to generate.", "default": "100", "example": "20", "nullable": true, @@ -71,6 +84,7 @@ "repetition_penalty": { "type": "number", "format": "float", + "description": "The parameter for repetition penalty. 1.0 means no penalty.\nSee [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.", "default": "null", "example": 1.03, "nullable": true, @@ -78,6 +92,7 @@ }, "return_full_text": { "type": "boolean", + "description": "Whether to prepend the prompt to the generated text", "default": "null", "example": false, "nullable": true @@ -85,6 +100,7 @@ "seed": { "type": "integer", "format": "int64", + "description": "Random sampling seed.", "default": "null", "example": "null", "nullable": true, @@ -96,12 +112,14 @@ "items": { "type": "string" }, + "description": "Stop generating tokens if a member of `stop` is generated.", "example": ["photographer"], "maxItems": 4 }, "temperature": { "type": "number", "format": "float", + "description": "The value used to module the logits distribution.", "default": "null", "example": 0.5, "nullable": true, @@ -110,6 +128,7 @@ "top_k": { "type": "integer", "format": "int32", + "description": "The number of highest probability vocabulary tokens to keep for top-k-filtering.", "default": "null", "example": 10, "nullable": true, @@ -118,6 +137,7 @@ "top_n_tokens": { "type": "integer", "format": "int32", + "description": "The number of highest probability vocabulary tokens to keep for top-n-filtering.", "default": "null", "example": 5, "nullable": true, @@ -127,6 +147,7 @@ "top_p": { "type": "number", "format": "float", + "description": "Top-p value for nucleus sampling.", "default": "null", "example": 0.95, "nullable": true, @@ -135,6 +156,7 @@ }, "truncate": { "type": "integer", + "description": "Truncate inputs tokens to the given size.", "default": "null", "example": "null", "nullable": true, @@ -143,6 +165,7 @@ "typical_p": { "type": "number", "format": "float", + "description": "Typical Decoding mass\nSee [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.", "default": "null", "example": 0.95, "nullable": true, @@ -151,6 +174,7 @@ }, "watermark": { "type": "boolean", + "description": "Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226).", "default": "false", "example": true }