From 1b94f5061baf5b4aa5f8bdd4741c492241820dff Mon Sep 17 00:00:00 2001 From: Chono N <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 20 Jul 2024 19:45:11 +0200 Subject: [PATCH 01/33] Update .gitignore --- .gitignore | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bbfd2cb..c622c7e 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,32 @@ # Dependency directories (remove the comment below to include it) # vendor/ -.idea \ No newline at end of file +.idea + +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk From 636f1e3ac699baaedf858071ad5f897cfb53f7fe Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:18:29 +0200 Subject: [PATCH 02/33] - Added support for Azure AI Studio. - Added base for serverless studio deployments. - Added dotenv. - Add sup and override mode. --- Dockerfile | 2 + main.go | 39 ++++++++++++++++ pkg/azure/proxy.go | 64 +++++++++++++++----------- pkg/azure/types.go | 110 +++++++++++++++++++++++++++------------------ 4 files changed, 146 insertions(+), 69 deletions(-) diff --git a/Dockerfile b/Dockerfile index adf39d5..886665e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,11 @@ FROM golang:1.22.5 AS builder WORKDIR /build COPY . . +RUN go get github.com/joho/godotenv RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy . FROM gcr.io/distroless/base-debian12 COPY --from=builder /build/azure-oai-proxy / +COPY --from=builder /build/.env /.env EXPOSE 11437 ENTRYPOINT ["/azure-oai-proxy"] \ No newline at end of file diff --git a/main.go b/main.go index 28cfe4a..2fbfaed 100644 --- a/main.go +++ b/main.go @@ -7,10 +7,12 @@ import ( "log" "net/http" "os" + "strings" "github.com/gin-gonic/gin" "github.com/gyarbij/azure-oai-proxy/pkg/azure" "github.com/gyarbij/azure-oai-proxy/pkg/openai" + "github.com/joho/godotenv" ) var ( @@ -60,6 +62,43 @@ func init() { log.Printf("loading azure openai proxy mode: %s", ProxyMode) } +func init() { + // Load .env file if it exists + if err := godotenv.Load(); err != nil { + log.Println("No .env file found") + } + + gin.SetMode(gin.ReleaseMode) + if v := os.Getenv("AZURE_OPENAI_PROXY_ADDRESS"); v != "" { + Address = v + } + if v := os.Getenv("AZURE_OPENAI_PROXY_MODE"); v != "" { + ProxyMode = v + } + log.Printf("loading azure openai proxy address: %s", Address) + log.Printf("loading azure openai proxy mode: %s", ProxyMode) + + // Load Azure OpenAI Model Mapper + if v := os.Getenv("AZURE_OPENAI_MODEL_MAPPER"); v != "" { + for _, pair := range strings.Split(v, ",") { + info := strings.Split(pair, "=") + if len(info) == 2 { + azure.AzureOpenAIModelMapper[info[0]] = info[1] + } + } + } + + // Load Azure AI Studio Deployments + if v := os.Getenv("AZURE_AI_STUDIO_DEPLOYMENTS"); v != "" { + for _, pair := range strings.Split(v, ",") { + info := strings.Split(pair, "=") + if len(info) == 2 { + azure.AzureAIStudioDeployments[info[0]] = info[1] + } + } + } +} + func main() { router := gin.Default() if ProxyMode == "azure" { diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 7bcbbae..bcd1be0 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -48,7 +48,8 @@ var ( "text-embedding-3-small": "text-embedding-3-small-1", "text-embedding-3-large": "text-embedding-3-large-1", } - fallbackModelMapper = regexp.MustCompile(`[.:]`) + AzureAIStudioDeployments = make(map[string]string) + fallbackModelMapper = regexp.MustCompile(`[.:]`) ) func init() { @@ -58,14 +59,17 @@ func init() { if v := os.Getenv("AZURE_OPENAI_ENDPOINT"); v != "" { AzureOpenAIEndpoint = v } - if v := os.Getenv("AZURE_OPENAI_MODEL_MAPPER"); v != "" { + + handleModelMapper() + + if v := os.Getenv("AZURE_AI_STUDIO_DEPLOYMENTS"); v != "" { for _, pair := range strings.Split(v, ",") { info := strings.Split(pair, "=") - if len(info) != 2 { - log.Printf("error parsing AZURE_OPENAI_MODEL_MAPPER, invalid value %s", pair) - os.Exit(1) + if len(info) == 2 { + AzureAIStudioDeployments[info[0]] = info[1] + } else { + log.Printf("error parsing AZURE_AI_STUDIO_DEPLOYMENTS, invalid value %s", pair) } - AzureOpenAIModelMapper[info[0]] = info[1] } } if v := os.Getenv("AZURE_OPENAI_TOKEN"); v != "" { @@ -76,7 +80,35 @@ func init() { log.Printf("loading azure api endpoint: %s", AzureOpenAIEndpoint) log.Printf("loading azure api version: %s", AzureOpenAIAPIVersion) for k, v := range AzureOpenAIModelMapper { - log.Printf("loading azure model mapper: %s -> %s", k, v) + log.Printf("final azure model mapper: %s -> %s", k, v) + } + for k, v := range AzureAIStudioDeployments { + log.Printf("loading azure ai studio deployment: %s -> %s", k, v) + } +} + +func handleModelMapper() { + overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override" + + if v := os.Getenv("AZURE_OPENAI_MODEL_MAPPER"); v != "" { + for _, pair := range strings.Split(v, ",") { + info := strings.Split(pair, "=") + if len(info) == 2 { + if overrideMode { + AzureOpenAIModelMapper[info[0]] = info[1] + log.Printf("Overriding model mapping: %s -> %s", info[0], info[1]) + } else { + if _, exists := AzureOpenAIModelMapper[info[0]]; !exists { + AzureOpenAIModelMapper[info[0]] = info[1] + log.Printf("Adding new model mapping: %s -> %s", info[0], info[1]) + } else { + log.Printf("Skipping existing model mapping: %s", info[0]) + } + } + } else { + log.Printf("error parsing AZURE_OPENAI_MODEL_MAPPER, invalid value %s", pair) + } + } } } @@ -102,7 +134,6 @@ func getModelFromRequest(req *http.Request) string { return gjson.GetBytes(body, "model").String() } -// sanitizeHeaders returns a copy of the headers with sensitive information redacted func sanitizeHeaders(headers http.Header) http.Header { sanitized := make(http.Header) for key, values := range headers { @@ -118,48 +149,36 @@ func sanitizeHeaders(headers http.Header) http.Header { func HandleToken(req *http.Request) { var token string - // Check for API Key in the api-key header if apiKey := req.Header.Get("api-key"); apiKey != "" { token = apiKey } else if authHeader := req.Header.Get("Authorization"); authHeader != "" { - // If not found, check for Authorization header token = strings.TrimPrefix(authHeader, "Bearer ") } else if AzureOpenAIToken != "" { - // If neither is present, use the AzureOpenAIToken if set token = AzureOpenAIToken } else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" { - // As a last resort, check for API key in environment variable token = envApiKey } if token != "" { - // Set the api-key header with the found token req.Header.Set("api-key", token) - // Remove the Authorization header to avoid conflicts req.Header.Del("Authorization") } else { log.Println("Warning: No authentication token found") } } -// Update the makeDirector function to handle the new endpoint structure func makeDirector(remote *url.URL) func(*http.Request) { return func(req *http.Request) { - - // Get model and map it to deployment model := getModelFromRequest(req) deployment := GetDeploymentByModel(model) - // Handle token HandleToken(req) - // Set the Host, Scheme, Path, and RawPath of the request originURL := req.URL.String() req.Host = remote.Host req.URL.Scheme = remote.Scheme req.URL.Host = remote.Host - // Handle different endpoints switch { case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"): req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions") @@ -185,7 +204,6 @@ func makeDirector(remote *url.URL) func(*http.Request) { req.URL.RawPath = req.URL.EscapedPath() - // Add logging for new parameters if req.Body != nil { var requestBody map[string]interface{} bodyBytes, _ := io.ReadAll(req.Body) @@ -198,17 +216,14 @@ func makeDirector(remote *url.URL) func(*http.Request) { } } - // Restore the body to the request req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) } - // Add the api-version query parameter query := req.URL.Query() query.Add("api-version", AzureOpenAIAPIVersion) req.URL.RawQuery = query.Encode() log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String()) - // log.Printf("Sanitized Request Headers: %v", sanitizeHeaders(req.Header)) } } @@ -219,7 +234,6 @@ func modifyResponse(res *http.Response) error { res.Body = io.NopCloser(bytes.NewBuffer(body)) } - // Handle streaming responses if res.Header.Get("Content-Type") == "text/event-stream" { res.Header.Set("X-Accel-Buffering", "no") } diff --git a/pkg/azure/types.go b/pkg/azure/types.go index 70d68c8..dca0462 100644 --- a/pkg/azure/types.go +++ b/pkg/azure/types.go @@ -5,25 +5,43 @@ type ListModelResponse struct { Data []Model `json:"data"` } +// AzureConfig represents the configuration for Azure OpenAI +type AzureConfig struct { + APIVersion string `json:"api_version"` + Endpoint string `json:"endpoint"` + Token string `json:"token"` + ModelMapperMode string `json:"model_mapper_mode"` + AIStudioDeploymentsRaw string `json:"ai_studio_deployments_raw"` +} + +// AzureAIStudioDeployment represents a deployment in Azure AI Studio +type AzureAIStudioDeployment struct { + ModelName string `json:"model_name"` + DeploymentName string `json:"deployment_name"` + Region string `json:"region"` +} + +// Update Model struct to include new fields if necessary type Model struct { - ID string `json:"id"` - Object string `json:"object"` - CreatedAt int64 `json:"created_at"` - Capabilities Capabilities `json:"capabilities"` - LifecycleStatus string `json:"lifecycle_status"` - Status string `json:"status"` - Deprecation Deprecation `json:"deprecation,omitempty"` - FineTune string `json:"fine_tune,omitempty"` - Created int `json:"created"` - OwnedBy string `json:"owned_by"` - Permission []ModelPermission `json:"permission"` - Root string `json:"root"` - Parent any `json:"parent"` + ID string `json:"id"` + Object string `json:"object"` + CreatedAt int64 `json:"created_at"` + Capabilities Capabilities `json:"capabilities"` + LifecycleStatus string `json:"lifecycle_status"` + Status string `json:"status"` + Deprecation Deprecation `json:"deprecation,omitempty"` + FineTune string `json:"fine_tune,omitempty"` + Created int `json:"created"` + OwnedBy string `json:"owned_by"` + Permission []ModelPermission `json:"permission"` + Root string `json:"root"` + Parent any `json:"parent"` + // Add any new fields from the latest Azure OpenAI API here } type ModelList struct { - Object string `json:"object"` - Data []Model `json:"data"` + Object string `json:"object"` + Data []Model `json:"data"` } type ModelPermission struct { @@ -59,10 +77,10 @@ type ListDeployedModelsResponse struct { // JSONModeRequest represents a request with JSON mode enabled type JSONModeRequest struct { - Model string `json:"model"` - Messages []ChatMessage `json:"messages"` - Temperature float64 `json:"temperature,omitempty"` - MaxTokens int `json:"max_tokens,omitempty"` + Model string `json:"model"` + Messages []ChatMessage `json:"messages"` + Temperature float64 `json:"temperature,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` ResponseFormat *ResponseFormat `json:"response_format,omitempty"` } @@ -79,12 +97,12 @@ type ChatMessage struct { // JSONModeResponse represents a response when JSON mode is enabled type JSONModeResponse struct { - ID string `json:"id"` - Object string `json:"object"` - Created int64 `json:"created"` - Model string `json:"model"` + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + Model string `json:"model"` Choices []JSONChoice `json:"choices"` - Usage Usage `json:"usage"` + Usage Usage `json:"usage"` } // JSONChoice represents a choice in the JSON mode response @@ -109,15 +127,15 @@ type DeploymentCapability struct { // DeploymentDetails represents detailed information about a deployment type DeploymentDetails struct { - ID string `json:"id"` - ModelID string `json:"model"` - OwnerID string `json:"owner"` - Status string `json:"status"` - CreatedAt string `json:"created_at"` - UpdatedAt string `json:"updated_at"` - Capabilities []DeploymentCapability `json:"capabilities"` - ScaleSettings ScaleSettings `json:"scale_settings"` - RaiPolicy string `json:"rai_policy"` + ID string `json:"id"` + ModelID string `json:"model"` + OwnerID string `json:"owner"` + Status string `json:"status"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` + Capabilities []DeploymentCapability `json:"capabilities"` + ScaleSettings ScaleSettings `json:"scale_settings"` + RaiPolicy string `json:"rai_policy"` } // ScaleSettings represents the scale settings for a deployment @@ -165,8 +183,8 @@ type ImageGenerationRequest struct { // ImageGenerationResponse represents the response structure from image generation type ImageGenerationResponse struct { - Created int64 `json:"created"` - Data []ImageData `json:"data"` + Created int64 `json:"created"` + Data []ImageData `json:"data"` } // ImageData represents data of a generated image @@ -176,8 +194,8 @@ type ImageData struct { // AudioTranscriptionRequest represents the request structure for audio transcription type AudioTranscriptionRequest struct { - Model string `json:"model"` - Audio []byte `json:"audio"` + Model string `json:"model"` + Audio []byte `json:"audio"` Language string `json:"language,omitempty"` } @@ -186,15 +204,19 @@ type AudioTranscriptionResponse struct { Text string `json:"text"` } +// Update if there are any changes to the capabilities type Capabilities struct { - ChatCompletion bool `json:"chat_completion"` - Completion bool `json:"completion"` - Embeddings bool `json:"embeddings"` - FineTune bool `json:"fine_tune"` - Inference bool `json:"inference"` + ChatCompletion bool `json:"chat_completion"` + Completion bool `json:"completion"` + Embeddings bool `json:"embeddings"` + FineTune bool `json:"fine_tune"` + Inference bool `json:"inference"` + // Add any new capabilities here } +// Update if there are any changes to the deprecation structure type Deprecation struct { - FineTune int `json:"fine_tune,omitempty"` - Inference int `json:"inference,omitempty"` + FineTune int `json:"fine_tune,omitempty"` + Inference int `json:"inference,omitempty"` + // Add any new deprecation fields here } From 677cac00347a344ee0b3951560319fba364c28f3 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:29:55 +0200 Subject: [PATCH 03/33] chore: Update Dockerfile to handle missing .env file during build --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 886665e..e4bf35d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,6 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy . FROM gcr.io/distroless/base-debian12 COPY --from=builder /build/azure-oai-proxy / -COPY --from=builder /build/.env /.env +COPY --from=builder /build/.env /.env* 2>/dev/null || touch /.env EXPOSE 11437 ENTRYPOINT ["/azure-oai-proxy"] \ No newline at end of file From f33da9ef7da017e9ea57f400706dec8bd279d899 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:38:39 +0200 Subject: [PATCH 04/33] chore: Update Dockerfile to handle missing .env file during build --- Dockerfile | 8 +++++--- example.env | 7 +++++++ 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 example.env diff --git a/Dockerfile b/Dockerfile index e4bf35d..f938b16 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,13 @@ -FROM golang:1.22.5 AS builder +FROM golang:1.18 AS builder WORKDIR /build COPY . . -RUN go get github.com/joho/godotenv RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy . +RUN echo "#!/bin/sh\ncp /build/.env /app/.env 2>/dev/null || touch /app/.env" > /handle-env.sh +RUN chmod +x /handle-env.sh + FROM gcr.io/distroless/base-debian12 COPY --from=builder /build/azure-oai-proxy / -COPY --from=builder /build/.env /.env* 2>/dev/null || touch /.env +COPY --from=builder /app/.env /.env EXPOSE 11437 ENTRYPOINT ["/azure-oai-proxy"] \ No newline at end of file diff --git a/example.env b/example.env new file mode 100644 index 0000000..fa9e655 --- /dev/null +++ b/example.env @@ -0,0 +1,7 @@ +AZURE_OPENAI_APIVERSION=2024-06-01 +AZURE_OPENAI_ENDPOINT=https://your-azure-openai-resource.openai.azure.com/ +AZURE_OPENAI_API_KEY=your-azure-openai-api-key +AZURE_OPENAI_MODEL_MAPPER=gpt-3.5-turbo=gpt-35-turbo,gpt-4=gpt-4-0613 +AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large=Mistral-large2:swedencentral,llama-3=Meta-Llama-31-405B-Instruct:northcentralus +AZURE_OPENAI_PROXY_ADDRESS=0.0.0.0:11437 +AZURE_OPENAI_PROXY_MODE=azure \ No newline at end of file From 2167f204bbed11457a8f8721379e017e07c64d68 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:42:40 +0200 Subject: [PATCH 05/33] chore: Update Dockerfile to use golang 1.22.5 and handle missing .env file during build --- Dockerfile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index f938b16..5f97aea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,11 @@ -FROM golang:1.18 AS builder +FROM golang:1.22.5 AS builder WORKDIR /build COPY . . +RUN go get github.com/joho/godotenv RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy . -RUN echo "#!/bin/sh\ncp /build/.env /app/.env 2>/dev/null || touch /app/.env" > /handle-env.sh -RUN chmod +x /handle-env.sh - FROM gcr.io/distroless/base-debian12 COPY --from=builder /build/azure-oai-proxy / -COPY --from=builder /app/.env /.env +COPY --from=builder /build/.env / 2>/dev/null || true EXPOSE 11437 ENTRYPOINT ["/azure-oai-proxy"] \ No newline at end of file From 5361d6bc5c8fc1dd208043548f6347ed4cb20e9f Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:45:58 +0200 Subject: [PATCH 06/33] chore: Update Dockerfile to handle missing .env file during build --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5f97aea..6d13ebf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,6 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy . FROM gcr.io/distroless/base-debian12 COPY --from=builder /build/azure-oai-proxy / -COPY --from=builder /build/.env / 2>/dev/null || true +RUN ["/busybox/sh", "-c", "touch /.env"] EXPOSE 11437 ENTRYPOINT ["/azure-oai-proxy"] \ No newline at end of file From f03d593489b6274dfbff9ad5df0f2f52a4d52586 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 15:52:54 +0200 Subject: [PATCH 07/33] chore: Update Dockerfile to handle missing .env file during build --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 6d13ebf..0d0f2bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,5 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy . FROM gcr.io/distroless/base-debian12 COPY --from=builder /build/azure-oai-proxy / -RUN ["/busybox/sh", "-c", "touch /.env"] EXPOSE 11437 ENTRYPOINT ["/azure-oai-proxy"] \ No newline at end of file From 3b98ce41142f5bc2c6d38edb9453de923e6dd653 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:22:55 +0200 Subject: [PATCH 08/33] chore: Initialize ServerlessDeploymentKeys map and handle serverless deployments --- pkg/azure/proxy.go | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index bcd1be0..fafb322 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -49,6 +49,7 @@ var ( "text-embedding-3-large": "text-embedding-3-large-1", } AzureAIStudioDeployments = make(map[string]string) + ServerlessDeploymentKeys = make(map[string]string) fallbackModelMapper = regexp.MustCompile(`[.:]`) ) @@ -85,6 +86,16 @@ func init() { for k, v := range AzureAIStudioDeployments { log.Printf("loading azure ai studio deployment: %s -> %s", k, v) } + + // Initialize ServerlessDeploymentKeys + for key, value := range os.Environ() { + if strings.HasPrefix(key, "AZURE_OPENAI_KEY_") { + deploymentName := strings.TrimPrefix(key, "AZURE_OPENAI_KEY_") + ServerlessDeploymentKeys[strings.ToLower(deploymentName)] = value + } + } + + log.Printf("Loaded %d serverless deployment keys", len(ServerlessDeploymentKeys)) } func handleModelMapper() { @@ -147,6 +158,16 @@ func sanitizeHeaders(headers http.Header) http.Header { } func HandleToken(req *http.Request) { + deployment := extractDeploymentFromPath(req.URL.Path) + + // Check if it's a serverless deployment + if apiKey, ok := ServerlessDeploymentKeys[strings.ToLower(deployment)]; ok { + req.Header.Set("api-key", apiKey) + req.Header.Del("Authorization") + return + } + + // Existing token handling logic var token string if apiKey := req.Header.Get("api-key"); apiKey != "" { @@ -163,8 +184,18 @@ func HandleToken(req *http.Request) { req.Header.Set("api-key", token) req.Header.Del("Authorization") } else { - log.Println("Warning: No authentication token found") + log.Println("Warning: No authentication token found for deployment:", deployment) + } +} + +func extractDeploymentFromPath(path string) string { + parts := strings.Split(path, "/") + for i, part := range parts { + if part == "deployments" && i+1 < len(parts) { + return parts[i+1] + } } + return "" } func makeDirector(remote *url.URL) func(*http.Request) { From e4b6d56904163624aaee165280f73aa38e642d2a Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:31:40 +0200 Subject: [PATCH 09/33] chore: Refactor HandleToken function to handle serverless deployments and improve token handling logic --- pkg/azure/proxy.go | 74 ++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index fafb322..fd31deb 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -88,16 +88,51 @@ func init() { } // Initialize ServerlessDeploymentKeys - for key, value := range os.Environ() { - if strings.HasPrefix(key, "AZURE_OPENAI_KEY_") { - deploymentName := strings.TrimPrefix(key, "AZURE_OPENAI_KEY_") - ServerlessDeploymentKeys[strings.ToLower(deploymentName)] = value + for _, env := range os.Environ() { + parts := strings.SplitN(env, "=", 2) + if len(parts) == 2 { + key, value := parts[0], parts[1] + if strings.HasPrefix(key, "AZURE_OPENAI_KEY_") { + deploymentName := strings.TrimPrefix(key, "AZURE_OPENAI_KEY_") + ServerlessDeploymentKeys[strings.ToLower(deploymentName)] = value + } } } log.Printf("Loaded %d serverless deployment keys", len(ServerlessDeploymentKeys)) } +func HandleToken(req *http.Request) { + deployment := extractDeploymentFromPath(req.URL.Path) + + // Check if it's a serverless deployment + if apiKey, ok := ServerlessDeploymentKeys[strings.ToLower(deployment)]; ok { + req.Header.Set("api-key", apiKey) + req.Header.Del("Authorization") + return + } + + // Existing token handling logic + var token string + + if apiKey := req.Header.Get("api-key"); apiKey != "" { + token = apiKey + } else if authHeader := req.Header.Get("Authorization"); authHeader != "" { + token = strings.TrimPrefix(authHeader, "Bearer ") + } else if AzureOpenAIToken != "" { + token = AzureOpenAIToken + } else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" { + token = envApiKey + } + + if token != "" { + req.Header.Set("api-key", token) + req.Header.Del("Authorization") + } else { + log.Println("Warning: No authentication token found for deployment:", deployment) + } +} + func handleModelMapper() { overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override" @@ -157,37 +192,6 @@ func sanitizeHeaders(headers http.Header) http.Header { return sanitized } -func HandleToken(req *http.Request) { - deployment := extractDeploymentFromPath(req.URL.Path) - - // Check if it's a serverless deployment - if apiKey, ok := ServerlessDeploymentKeys[strings.ToLower(deployment)]; ok { - req.Header.Set("api-key", apiKey) - req.Header.Del("Authorization") - return - } - - // Existing token handling logic - var token string - - if apiKey := req.Header.Get("api-key"); apiKey != "" { - token = apiKey - } else if authHeader := req.Header.Get("Authorization"); authHeader != "" { - token = strings.TrimPrefix(authHeader, "Bearer ") - } else if AzureOpenAIToken != "" { - token = AzureOpenAIToken - } else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" { - token = envApiKey - } - - if token != "" { - req.Header.Set("api-key", token) - req.Header.Del("Authorization") - } else { - log.Println("Warning: No authentication token found for deployment:", deployment) - } -} - func extractDeploymentFromPath(path string) string { parts := strings.Split(path, "/") for i, part := range parts { From 4d8a5cd336f5a824bdf3be9da53e96622d5b145b Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:56:33 +0200 Subject: [PATCH 10/33] chore: Add serverless deployments to the models list --- main.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/main.go b/main.go index 2fbfaed..2ea0048 100644 --- a/main.go +++ b/main.go @@ -148,6 +148,21 @@ func handleGetModels(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch deployed models"}) return } + + // Add serverless deployments to the models list + for deploymentName := range azure.ServerlessDeploymentKeys { + models = append(models, Model{ + ID: deploymentName, + Object: "model", + Capabilities: Capabilities{ + Completion: true, + ChatCompletion: true, + }, + LifecycleStatus: "active", + Status: "ready", + }) + } + result := ModelList{ Object: "list", Data: models, From fa089a8f24aafe7a092d88bd7b2778bcffb5ed42 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:15:11 +0200 Subject: [PATCH 11/33] Added a ServerlessDeploymentInfo map to store information about serverless deployments. Modified the init() function to parse the AZURE_AI_STUDIO_DEPLOYMENTS environment variable and populate both AzureAIStudioDeployments and ServerlessDeploymentInfo. Updated HandleToken() to use the serverless deployment key when appropriate. Modified makeDirector() to construct the correct URL for serverless deployments. --- pkg/azure/proxy.go | 53 ++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index fd31deb..6d416d7 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -3,6 +3,7 @@ package azure import ( "bytes" "encoding/json" + "fmt" "io" "log" "net/http" @@ -49,10 +50,16 @@ var ( "text-embedding-3-large": "text-embedding-3-large-1", } AzureAIStudioDeployments = make(map[string]string) - ServerlessDeploymentKeys = make(map[string]string) + ServerlessDeploymentInfo = make(map[string]ServerlessDeployment) fallbackModelMapper = regexp.MustCompile(`[.:]`) ) +type ServerlessDeployment struct { + Name string + Region string + Key string +} + func init() { if v := os.Getenv("AZURE_OPENAI_APIVERSION"); v != "" { AzureOpenAIAPIVersion = v @@ -67,12 +74,21 @@ func init() { for _, pair := range strings.Split(v, ",") { info := strings.Split(pair, "=") if len(info) == 2 { - AzureAIStudioDeployments[info[0]] = info[1] + deploymentInfo := strings.Split(info[1], ":") + if len(deploymentInfo) == 2 { + AzureAIStudioDeployments[info[0]] = deploymentInfo[0] + ServerlessDeploymentInfo[strings.ToLower(info[0])] = ServerlessDeployment{ + Name: deploymentInfo[0], + Region: deploymentInfo[1], + Key: os.Getenv("AZURE_OPENAI_KEY_" + strings.ToUpper(info[0])), + } + } } else { log.Printf("error parsing AZURE_AI_STUDIO_DEPLOYMENTS, invalid value %s", pair) } } } + if v := os.Getenv("AZURE_OPENAI_TOKEN"); v != "" { AzureOpenAIToken = v log.Printf("loading azure api token from env") @@ -86,28 +102,15 @@ func init() { for k, v := range AzureAIStudioDeployments { log.Printf("loading azure ai studio deployment: %s -> %s", k, v) } - - // Initialize ServerlessDeploymentKeys - for _, env := range os.Environ() { - parts := strings.SplitN(env, "=", 2) - if len(parts) == 2 { - key, value := parts[0], parts[1] - if strings.HasPrefix(key, "AZURE_OPENAI_KEY_") { - deploymentName := strings.TrimPrefix(key, "AZURE_OPENAI_KEY_") - ServerlessDeploymentKeys[strings.ToLower(deploymentName)] = value - } - } - } - - log.Printf("Loaded %d serverless deployment keys", len(ServerlessDeploymentKeys)) + log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo)) } func HandleToken(req *http.Request) { deployment := extractDeploymentFromPath(req.URL.Path) // Check if it's a serverless deployment - if apiKey, ok := ServerlessDeploymentKeys[strings.ToLower(deployment)]; ok { - req.Header.Set("api-key", apiKey) + if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { + req.Header.Set("api-key", info.Key) req.Header.Del("Authorization") return } @@ -210,9 +213,17 @@ func makeDirector(remote *url.URL) func(*http.Request) { HandleToken(req) originURL := req.URL.String() - req.Host = remote.Host - req.URL.Scheme = remote.Scheme - req.URL.Host = remote.Host + + // Check if it's a serverless deployment + if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { + req.URL.Scheme = "https" + req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region) + req.Host = req.URL.Host + } else { + req.Host = remote.Host + req.URL.Scheme = remote.Scheme + req.URL.Host = remote.Host + } switch { case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"): From 9233ce36ec732cde909bb94fcf47709f780bbcc4 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:20:12 +0200 Subject: [PATCH 12/33] Removed the parsing of AZURE_AI_STUDIO_DEPLOYMENTS from main.go, now handled in azure.init() . Updated handleGetModels function to include serverless deployments: It now iterates over azure.ServerlessDeploymentInfo to add serverless deployments to the list of models. Serverless deployments have base: (Completion, ChatCompletion, and Inference). --- main.go | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/main.go b/main.go index 2ea0048..d685cb6 100644 --- a/main.go +++ b/main.go @@ -50,18 +50,6 @@ type Deprecation struct { Inference int64 `json:"inference"` } -func init() { - gin.SetMode(gin.ReleaseMode) - if v := os.Getenv("AZURE_OPENAI_PROXY_ADDRESS"); v != "" { - Address = v - } - if v := os.Getenv("AZURE_OPENAI_PROXY_MODE"); v != "" { - ProxyMode = v - } - log.Printf("loading azure openai proxy address: %s", Address) - log.Printf("loading azure openai proxy mode: %s", ProxyMode) -} - func init() { // Load .env file if it exists if err := godotenv.Load(); err != nil { @@ -87,16 +75,6 @@ func init() { } } } - - // Load Azure AI Studio Deployments - if v := os.Getenv("AZURE_AI_STUDIO_DEPLOYMENTS"); v != "" { - for _, pair := range strings.Split(v, ",") { - info := strings.Split(pair, "=") - if len(info) == 2 { - azure.AzureAIStudioDeployments[info[0]] = info[1] - } - } - } } func main() { @@ -150,13 +128,14 @@ func handleGetModels(c *gin.Context) { } // Add serverless deployments to the models list - for deploymentName := range azure.ServerlessDeploymentKeys { + for deploymentName, info := range azure.ServerlessDeploymentInfo { models = append(models, Model{ ID: deploymentName, Object: "model", Capabilities: Capabilities{ Completion: true, ChatCompletion: true, + Inference: true, }, LifecycleStatus: "active", Status: "ready", From b42e8ce92a7bca1e0799feab1bfa59a086724720 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:23:24 +0200 Subject: [PATCH 13/33] chore: Update go.mod to include github.com/joho/godotenv v1.5.1 --- go.mod | 1 + go.sum | 2 ++ 2 files changed, 3 insertions(+) diff --git a/go.mod b/go.mod index 0ff66a5..682f70a 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.22.4 require ( github.com/gin-gonic/gin v1.10.0 + github.com/joho/godotenv v1.5.1 github.com/tidwall/gjson v1.17.1 ) diff --git a/go.sum b/go.sum index a4ff04c..4c2d16e 100644 --- a/go.sum +++ b/go.sum @@ -28,6 +28,8 @@ github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MG github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= From e3d29f4a03e1fd3c10ebd69c3da361a6d142a6de Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:28:14 +0200 Subject: [PATCH 14/33] remove unused import _info --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index d685cb6..8a8ec8b 100644 --- a/main.go +++ b/main.go @@ -128,7 +128,7 @@ func handleGetModels(c *gin.Context) { } // Add serverless deployments to the models list - for deploymentName, info := range azure.ServerlessDeploymentInfo { + for deploymentName := range azure.ServerlessDeploymentInfo { models = append(models, Model{ ID: deploymentName, Object: "model", From 4f5c828fbea663207254eda2929c3e727bef2bf4 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:37:41 +0200 Subject: [PATCH 15/33] chore: Refactor HandleToken function to handle serverless auth and improve token handling logic --- pkg/azure/proxy.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 6d416d7..80022fb 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -110,12 +110,12 @@ func HandleToken(req *http.Request) { // Check if it's a serverless deployment if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { - req.Header.Set("api-key", info.Key) - req.Header.Del("Authorization") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) + req.Header.Del("api-key") return } - // Existing token handling logic + // Existing token handling logic for non-serverless deployments var token string if apiKey := req.Header.Get("api-key"); apiKey != "" { From cb1279cc31f6318bfb15fa6928c1bb0b584f73c6 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:55:48 +0200 Subject: [PATCH 16/33] chore: Refactor HandleToken function to handle serverless deployments and improve token handling logic --- pkg/azure/proxy.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 80022fb..94942cc 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -108,14 +108,13 @@ func init() { func HandleToken(req *http.Request) { deployment := extractDeploymentFromPath(req.URL.Path) - // Check if it's a serverless deployment if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) req.Header.Del("api-key") + log.Printf("Using serverless deployment authentication for %s", deployment) return } - // Existing token handling logic for non-serverless deployments var token string if apiKey := req.Header.Get("api-key"); apiKey != "" { @@ -131,8 +130,9 @@ func HandleToken(req *http.Request) { if token != "" { req.Header.Set("api-key", token) req.Header.Del("Authorization") + log.Printf("Using regular Azure OpenAI authentication for %s", deployment) } else { - log.Println("Warning: No authentication token found for deployment:", deployment) + log.Printf("Warning: No authentication token found for deployment: %s", deployment) } } From 7dd79bd628bf3d39082efe94a9bb5531a54676bf Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:15:01 +0200 Subject: [PATCH 17/33] handle serverless deployments url correctly --- pkg/azure/proxy.go | 48 +++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 94942cc..59e5c67 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -219,33 +219,37 @@ func makeDirector(remote *url.URL) func(*http.Request) { req.URL.Scheme = "https" req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region) req.Host = req.URL.Host + + // For serverless, keep the original path with '/v1' prefix + req.URL.Path = req.URL.Path } else { req.Host = remote.Host req.URL.Scheme = remote.Scheme req.URL.Host = remote.Host - } - switch { - case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"): - req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions") - case strings.HasPrefix(req.URL.Path, "/v1/completions"): - req.URL.Path = path.Join("/openai/deployments", deployment, "completions") - case strings.HasPrefix(req.URL.Path, "/v1/embeddings"): - req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings") - case strings.HasPrefix(req.URL.Path, "/v1/images/generations"): - req.URL.Path = path.Join("/openai/deployments", deployment, "images/generations") - case strings.HasPrefix(req.URL.Path, "/v1/fine_tunes"): - req.URL.Path = path.Join("/openai/deployments", deployment, "fine-tunes") - case strings.HasPrefix(req.URL.Path, "/v1/files"): - req.URL.Path = path.Join("/openai/deployments", deployment, "files") - case strings.HasPrefix(req.URL.Path, "/v1/audio/speech"): - req.URL.Path = path.Join("/openai/deployments", deployment, "audio/speech") - case strings.HasPrefix(req.URL.Path, "/v1/audio/transcriptions"): - req.URL.Path = path.Join("/openai/deployments", deployment, "transcriptions") - case strings.HasPrefix(req.URL.Path, "/v1/audio/translations"): - req.URL.Path = path.Join("/openai/deployments", deployment, "translations") - default: - req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/")) + // For regular Azure OpenAI, construct the path + switch { + case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"): + req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions") + case strings.HasPrefix(req.URL.Path, "/v1/completions"): + req.URL.Path = path.Join("/openai/deployments", deployment, "completions") + case strings.HasPrefix(req.URL.Path, "/v1/embeddings"): + req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings") + case strings.HasPrefix(req.URL.Path, "/v1/images/generations"): + req.URL.Path = path.Join("/openai/deployments", deployment, "images/generations") + case strings.HasPrefix(req.URL.Path, "/v1/fine_tunes"): + req.URL.Path = path.Join("/openai/deployments", deployment, "fine-tunes") + case strings.HasPrefix(req.URL.Path, "/v1/files"): + req.URL.Path = path.Join("/openai/deployments", deployment, "files") + case strings.HasPrefix(req.URL.Path, "/v1/audio/speech"): + req.URL.Path = path.Join("/openai/deployments", deployment, "audio/speech") + case strings.HasPrefix(req.URL.Path, "/v1/audio/transcriptions"): + req.URL.Path = path.Join("/openai/deployments", deployment, "transcriptions") + case strings.HasPrefix(req.URL.Path, "/v1/audio/translations"): + req.URL.Path = path.Join("/openai/deployments", deployment, "translations") + default: + req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/")) + } } req.URL.RawPath = req.URL.EscapedPath() From fbde5f450fe93c55a806695acb4cf9bbbf395217 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:24:30 +0200 Subject: [PATCH 18/33] introduce a "isServerless" boolean flag to track whether the current request is for a serverless deployment. For serverless deployments: set "isServerless" to true. We keep the original path, including the '/v1' prefix. does not modify the query parameters. For regular Azure OpenAI deployments: Add the api-version query parameter if "isServerless" is false. --- pkg/azure/proxy.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 59e5c67..793130b 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -214,8 +214,10 @@ func makeDirector(remote *url.URL) func(*http.Request) { originURL := req.URL.String() + isServerless := false // Check if it's a serverless deployment if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { + isServerless = true req.URL.Scheme = "https" req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region) req.Host = req.URL.Host @@ -269,9 +271,12 @@ func makeDirector(remote *url.URL) func(*http.Request) { req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) } - query := req.URL.Query() - query.Add("api-version", AzureOpenAIAPIVersion) - req.URL.RawQuery = query.Encode() + // Only add api-version for non-serverless deployments + if !isServerless { + query := req.URL.Query() + query.Add("api-version", AzureOpenAIAPIVersion) + req.URL.RawQuery = query.Encode() + } log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String()) } From 3d320b8d212d76fb4b72494210685e02294256f9 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:38:24 +0200 Subject: [PATCH 19/33] Introduce "isServerless" flag to track serverless deployments and handle URL construction --- pkg/azure/proxy.go | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 793130b..d549ae7 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -214,16 +214,12 @@ func makeDirector(remote *url.URL) func(*http.Request) { originURL := req.URL.String() - isServerless := false - // Check if it's a serverless deployment if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { - isServerless = true req.URL.Scheme = "https" req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region) req.Host = req.URL.Host - // For serverless, keep the original path with '/v1' prefix - req.URL.Path = req.URL.Path + log.Printf("Using serverless deployment for %s", deployment) } else { req.Host = remote.Host req.URL.Scheme = remote.Scheme @@ -252,6 +248,11 @@ func makeDirector(remote *url.URL) func(*http.Request) { default: req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/")) } + + // Only add api-version for non-serverless deployments + query := req.URL.Query() + query.Add("api-version", AzureOpenAIAPIVersion) + req.URL.RawQuery = query.Encode() } req.URL.RawPath = req.URL.EscapedPath() @@ -271,13 +272,6 @@ func makeDirector(remote *url.URL) func(*http.Request) { req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) } - // Only add api-version for non-serverless deployments - if !isServerless { - query := req.URL.Query() - query.Add("api-version", AzureOpenAIAPIVersion) - req.URL.RawQuery = query.Encode() - } - log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String()) } } @@ -289,6 +283,7 @@ func modifyResponse(res *http.Response) error { res.Body = io.NopCloser(bytes.NewBuffer(body)) } + // Handle streaming responses if res.Header.Get("Content-Type") == "text/event-stream" { res.Header.Set("X-Accel-Buffering", "no") } From 1aa7eaebe94d5e63fbe5044fa465628768c2519e Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:41:35 +0200 Subject: [PATCH 20/33] remove cargo --- .github/dependabot.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index bd4568a..6673b46 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -13,7 +13,3 @@ updates: directory: "/" # Location of package manifests schedule: interval: "weekly" - - package-ecosystem: "cargo" # See documentation for possible values - directory: "/" # Location of package manifests - schedule: - interval: "weekly" From 3651c8f11004c06a2309088c04636c83947f3fed Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:59:33 +0200 Subject: [PATCH 21/33] azoa --- pkg/azure/proxy.go | 51 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index d549ae7..c712d67 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -105,11 +105,46 @@ func init() { log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo)) } -func HandleToken(req *http.Request) { - deployment := extractDeploymentFromPath(req.URL.Path) +func proxyRequest(w http.ResponseWriter, r *http.Request) { + client := &http.Client{} + body, err := ioutil.ReadAll(r.Body) + if err != nil { + http.Error(w, "Failed to read request body", http.StatusInternalServerError) + return + } + + req, err := http.NewRequest("POST", "https://Mistral-large2.swedencentral.models.ai.azure.com/v1/chat/completions", strings.NewReader(string(body))) + if err != nil { + http.Error(w, "Failed to create request", http.StatusInternalServerError) + return + } + + // Forward headers from the original request + for name, values := range r.Header { + for _, value := range values { + req.Header.Add(name, value) + } + } + + handleToken(req, r.URL.Path) + + resp, err := client.Do(req) + if err != nil { + http.Error(w, "Failed to make request", http.StatusInternalServerError) + return + } + defer resp.Body.Close() + + respBody, err := ioutil.ReadAll(resp.Body) + if err != nil { + http.Error(w, "Failed to read response body", http.StatusInternalServerError) + return + } + + w.WriteHeader(resp.Sta// Removed duplicate function declarationctDeploymentFromPath(path) if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) + req.Header.Set("Authorization", "Bearer "+info.Key) req.Header.Del("api-key") log.Printf("Using serverless deployment authentication for %s", deployment) return @@ -136,6 +171,16 @@ func HandleToken(req *http.Request) { } } +func extractDeploymentFromPath(path string) string { + parts := strings.Split(path, "/") + for i, part := range parts { + if part == "deployments" && i+1 < len(parts) { + return parts[i+1] + } + } + return "" +} + func handleModelMapper() { overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override" From 0b2f7293165130bb7643433b9de1b75d9a115391 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 19:21:23 +0200 Subject: [PATCH 22/33] Refactor proxyRequest function to handle response writing and error handling --- pkg/azure/proxy.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index c712d67..d814052 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "io" + "io/ioutil" "log" "net/http" "net/http/httputil" @@ -141,7 +142,15 @@ func proxyRequest(w http.ResponseWriter, r *http.Request) { return } - w.WriteHeader(resp.Sta// Removed duplicate function declarationctDeploymentFromPath(path) + w.WriteHeader(resp.StatusCode) + _, err = w.Write(respBody) + if err != nil { + http.Error(w, "Failed to write response", http.StatusInternalServerError) + } +} + +func handleToken(req *http.Request, path string) { + deployment := extractDeploymentFromPath(path) if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { req.Header.Set("Authorization", "Bearer "+info.Key) @@ -240,22 +249,12 @@ func sanitizeHeaders(headers http.Header) http.Header { return sanitized } -func extractDeploymentFromPath(path string) string { - parts := strings.Split(path, "/") - for i, part := range parts { - if part == "deployments" && i+1 < len(parts) { - return parts[i+1] - } - } - return "" -} - func makeDirector(remote *url.URL) func(*http.Request) { return func(req *http.Request) { model := getModelFromRequest(req) deployment := GetDeploymentByModel(model) - HandleToken(req) + handleToken(req, req.URL.Path) originURL := req.URL.String() From 93f4b9fdcda99a42a1f240b451556bccc7ce5083 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 19:27:30 +0200 Subject: [PATCH 23/33] fix: azure proxy serverless api auth --- pkg/azure/proxy.go | 75 ++++++++++------------------------------------ 1 file changed, 15 insertions(+), 60 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index d814052..cd2caf5 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "log" "net/http" "net/http/httputil" @@ -106,54 +105,11 @@ func init() { log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo)) } -func proxyRequest(w http.ResponseWriter, r *http.Request) { - client := &http.Client{} - body, err := ioutil.ReadAll(r.Body) - if err != nil { - http.Error(w, "Failed to read request body", http.StatusInternalServerError) - return - } - - req, err := http.NewRequest("POST", "https://Mistral-large2.swedencentral.models.ai.azure.com/v1/chat/completions", strings.NewReader(string(body))) - if err != nil { - http.Error(w, "Failed to create request", http.StatusInternalServerError) - return - } - - // Forward headers from the original request - for name, values := range r.Header { - for _, value := range values { - req.Header.Add(name, value) - } - } - - handleToken(req, r.URL.Path) - - resp, err := client.Do(req) - if err != nil { - http.Error(w, "Failed to make request", http.StatusInternalServerError) - return - } - defer resp.Body.Close() - - respBody, err := ioutil.ReadAll(resp.Body) - if err != nil { - http.Error(w, "Failed to read response body", http.StatusInternalServerError) - return - } - - w.WriteHeader(resp.StatusCode) - _, err = w.Write(respBody) - if err != nil { - http.Error(w, "Failed to write response", http.StatusInternalServerError) - } -} - -func handleToken(req *http.Request, path string) { - deployment := extractDeploymentFromPath(path) +func HandleToken(req *http.Request) { + deployment := extractDeploymentFromPath(req.URL.Path) if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { - req.Header.Set("Authorization", "Bearer "+info.Key) + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) req.Header.Del("api-key") log.Printf("Using serverless deployment authentication for %s", deployment) return @@ -180,16 +136,6 @@ func handleToken(req *http.Request, path string) { } } -func extractDeploymentFromPath(path string) string { - parts := strings.Split(path, "/") - for i, part := range parts { - if part == "deployments" && i+1 < len(parts) { - return parts[i+1] - } - } - return "" -} - func handleModelMapper() { overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override" @@ -249,12 +195,22 @@ func sanitizeHeaders(headers http.Header) http.Header { return sanitized } +func extractDeploymentFromPath(path string) string { + parts := strings.Split(path, "/") + for i, part := range parts { + if part == "deployments" && i+1 < len(parts) { + return parts[i+1] + } + } + return "" +} + func makeDirector(remote *url.URL) func(*http.Request) { return func(req *http.Request) { model := getModelFromRequest(req) deployment := GetDeploymentByModel(model) - handleToken(req, req.URL.Path) + HandleToken(req) originURL := req.URL.String() @@ -262,7 +218,7 @@ func makeDirector(remote *url.URL) func(*http.Request) { req.URL.Scheme = "https" req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region) req.Host = req.URL.Host - // For serverless, keep the original path with '/v1' prefix + // For serverless, keep the original path log.Printf("Using serverless deployment for %s", deployment) } else { req.Host = remote.Host @@ -327,7 +283,6 @@ func modifyResponse(res *http.Response) error { res.Body = io.NopCloser(bytes.NewBuffer(body)) } - // Handle streaming responses if res.Header.Get("Content-Type") == "text/event-stream" { res.Header.Set("X-Accel-Buffering", "no") } From 98cff09591f5369dc90d670fdf6e28dd5d0e36ae Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 19:40:26 +0200 Subject: [PATCH 24/33] chore: Refactor HandleToken function to handle serverless deployments and improve token handling logic by removing strings.tolower from pre forked code. --- pkg/azure/proxy.go | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index cd2caf5..e0d2716 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -108,15 +108,33 @@ func init() { func HandleToken(req *http.Request) { deployment := extractDeploymentFromPath(req.URL.Path) - if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) - req.Header.Del("api-key") - log.Printf("Using serverless deployment authentication for %s", deployment) + // First, try an exact match + if info, ok := ServerlessDeploymentInfo[deployment]; ok { + setServerlessAuth(req, info, deployment) return } - var token string + // If no exact match, try case-insensitive match + for key, info := range ServerlessDeploymentInfo { + if strings.EqualFold(key, deployment) { + setServerlessAuth(req, info, deployment) + return + } + } + + // If no serverless match, proceed with regular Azure OpenAI authentication + handleRegularAuth(req, deployment) +} + +func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) { + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) + req.Header.Del("api-key") + log.Printf("Using serverless deployment authentication for %s", deployment) +} +func handleRegularAuth(req *http.Request, deployment string) { + // Existing code for regular Azure OpenAI authentication + var token string if apiKey := req.Header.Get("api-key"); apiKey != "" { token = apiKey } else if authHeader := req.Header.Get("Authorization"); authHeader != "" { From 3e7adf17e9208f09d0c29b16eae930d73f428431 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 20:09:39 +0200 Subject: [PATCH 25/33] fkry --- pkg/azure/proxy.go | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index e0d2716..d8da456 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -50,8 +50,8 @@ var ( "text-embedding-3-large": "text-embedding-3-large-1", } AzureAIStudioDeployments = make(map[string]string) - ServerlessDeploymentInfo = make(map[string]ServerlessDeployment) fallbackModelMapper = regexp.MustCompile(`[.:]`) + ServerlessDeploymentInfo = make(map[string]ServerlessDeployment) ) type ServerlessDeployment struct { @@ -76,18 +76,16 @@ func init() { if len(info) == 2 { deploymentInfo := strings.Split(info[1], ":") if len(deploymentInfo) == 2 { - AzureAIStudioDeployments[info[0]] = deploymentInfo[0] - ServerlessDeploymentInfo[strings.ToLower(info[0])] = ServerlessDeployment{ + ServerlessDeploymentInfo[info[0]] = ServerlessDeployment{ Name: deploymentInfo[0], Region: deploymentInfo[1], Key: os.Getenv("AZURE_OPENAI_KEY_" + strings.ToUpper(info[0])), } } - } else { - log.Printf("error parsing AZURE_AI_STUDIO_DEPLOYMENTS, invalid value %s", pair) } } } + log.Printf("Loaded ServerlessDeploymentInfo: %+v", ServerlessDeploymentInfo) if v := os.Getenv("AZURE_OPENAI_TOKEN"); v != "" { AzureOpenAIToken = v @@ -105,35 +103,34 @@ func init() { log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo)) } -func HandleToken(req *http.Request) { +func HandleToken(req *http.Request) string { deployment := extractDeploymentFromPath(req.URL.Path) // First, try an exact match if info, ok := ServerlessDeploymentInfo[deployment]; ok { - setServerlessAuth(req, info, deployment) - return + return setServerlessAuth(req, info, deployment) } // If no exact match, try case-insensitive match for key, info := range ServerlessDeploymentInfo { if strings.EqualFold(key, deployment) { - setServerlessAuth(req, info, deployment) - return + return setServerlessAuth(req, info, deployment) } } // If no serverless match, proceed with regular Azure OpenAI authentication - handleRegularAuth(req, deployment) + return handleRegularAuth(req, deployment) } -func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) { - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) +func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) string { + token := fmt.Sprintf("Bearer %s", info.Key) + req.Header.Set("Authorization", token) req.Header.Del("api-key") log.Printf("Using serverless deployment authentication for %s", deployment) + return deployment // Return the actual deployment name } -func handleRegularAuth(req *http.Request, deployment string) { - // Existing code for regular Azure OpenAI authentication +func handleRegularAuth(req *http.Request, deployment string) string { var token string if apiKey := req.Header.Get("api-key"); apiKey != "" { token = apiKey @@ -152,6 +149,7 @@ func handleRegularAuth(req *http.Request, deployment string) { } else { log.Printf("Warning: No authentication token found for deployment: %s", deployment) } + return deployment } func handleModelMapper() { @@ -226,13 +224,11 @@ func extractDeploymentFromPath(path string) string { func makeDirector(remote *url.URL) func(*http.Request) { return func(req *http.Request) { model := getModelFromRequest(req) - deployment := GetDeploymentByModel(model) - - HandleToken(req) + deployment := HandleToken(req) // This now returns the actual deployment name originURL := req.URL.String() - if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok { + if info, ok := ServerlessDeploymentInfo[deployment]; ok { req.URL.Scheme = "https" req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region) req.Host = req.URL.Host @@ -290,7 +286,8 @@ func makeDirector(remote *url.URL) func(*http.Request) { req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) } - log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String()) + log.Printf("Final request URL: %s", req.URL.String()) + log.Printf("Final request headers: %v", req.Header) } } From b2b726eb65ebdb495b9193d1c1e26fa30a0852d7 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 20:14:12 +0200 Subject: [PATCH 26/33] Refactor proxyRequest function to handle response writing and error handling --- pkg/azure/proxy.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index d8da456..78a2b83 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -227,6 +227,7 @@ func makeDirector(remote *url.URL) func(*http.Request) { deployment := HandleToken(req) // This now returns the actual deployment name originURL := req.URL.String() + log.Printf("Original request URL: %s for model: %s", originURL, model) if info, ok := ServerlessDeploymentInfo[deployment]; ok { req.URL.Scheme = "https" @@ -286,8 +287,8 @@ func makeDirector(remote *url.URL) func(*http.Request) { req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) } - log.Printf("Final request URL: %s", req.URL.String()) - log.Printf("Final request headers: %v", req.Header) + log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String()) + log.Printf("Final request headers: %v", sanitizeHeaders(req.Header)) } } From cc51b447d8cdf079df4ca4398eb0a1ba5ec6d796 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 20:26:18 +0200 Subject: [PATCH 27/33] fkery --- pkg/azure/proxy.go | 69 ++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 78a2b83..4c0639d 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -103,25 +103,6 @@ func init() { log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo)) } -func HandleToken(req *http.Request) string { - deployment := extractDeploymentFromPath(req.URL.Path) - - // First, try an exact match - if info, ok := ServerlessDeploymentInfo[deployment]; ok { - return setServerlessAuth(req, info, deployment) - } - - // If no exact match, try case-insensitive match - for key, info := range ServerlessDeploymentInfo { - if strings.EqualFold(key, deployment) { - return setServerlessAuth(req, info, deployment) - } - } - - // If no serverless match, proceed with regular Azure OpenAI authentication - return handleRegularAuth(req, deployment) -} - func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) string { token := fmt.Sprintf("Bearer %s", info.Key) req.Header.Set("Authorization", token) @@ -221,10 +202,43 @@ func extractDeploymentFromPath(path string) string { return "" } +func HandleToken(req *http.Request) string { + deployment := extractDeploymentFromPath(req.URL.Path) + + // First, try an exact match for serverless deployment + if info, ok := ServerlessDeploymentInfo[deployment]; ok { + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) + req.Header.Del("api-key") + log.Printf("Using serverless deployment authentication for %s", deployment) + return deployment + } + + // If no serverless match, proceed with regular Azure OpenAI authentication + var token string + if apiKey := req.Header.Get("api-key"); apiKey != "" { + token = apiKey + } else if authHeader := req.Header.Get("Authorization"); authHeader != "" { + token = strings.TrimPrefix(authHeader, "Bearer ") + } else if AzureOpenAIToken != "" { + token = AzureOpenAIToken + } else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" { + token = envApiKey + } + + if token != "" { + req.Header.Set("api-key", token) + req.Header.Del("Authorization") + log.Printf("Using regular Azure OpenAI authentication for %s", deployment) + } else { + log.Printf("Warning: No authentication token found for deployment: %s", deployment) + } + return deployment +} + func makeDirector(remote *url.URL) func(*http.Request) { return func(req *http.Request) { model := getModelFromRequest(req) - deployment := HandleToken(req) // This now returns the actual deployment name + deployment := HandleToken(req) originURL := req.URL.String() log.Printf("Original request URL: %s for model: %s", originURL, model) @@ -236,9 +250,9 @@ func makeDirector(remote *url.URL) func(*http.Request) { // For serverless, keep the original path log.Printf("Using serverless deployment for %s", deployment) } else { - req.Host = remote.Host req.URL.Scheme = remote.Scheme req.URL.Host = remote.Host + req.Host = remote.Host // For regular Azure OpenAI, construct the path switch { @@ -248,18 +262,7 @@ func makeDirector(remote *url.URL) func(*http.Request) { req.URL.Path = path.Join("/openai/deployments", deployment, "completions") case strings.HasPrefix(req.URL.Path, "/v1/embeddings"): req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings") - case strings.HasPrefix(req.URL.Path, "/v1/images/generations"): - req.URL.Path = path.Join("/openai/deployments", deployment, "images/generations") - case strings.HasPrefix(req.URL.Path, "/v1/fine_tunes"): - req.URL.Path = path.Join("/openai/deployments", deployment, "fine-tunes") - case strings.HasPrefix(req.URL.Path, "/v1/files"): - req.URL.Path = path.Join("/openai/deployments", deployment, "files") - case strings.HasPrefix(req.URL.Path, "/v1/audio/speech"): - req.URL.Path = path.Join("/openai/deployments", deployment, "audio/speech") - case strings.HasPrefix(req.URL.Path, "/v1/audio/transcriptions"): - req.URL.Path = path.Join("/openai/deployments", deployment, "transcriptions") - case strings.HasPrefix(req.URL.Path, "/v1/audio/translations"): - req.URL.Path = path.Join("/openai/deployments", deployment, "translations") + // ... (keep other cases) default: req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/")) } From 8122f0a8feb56c1c3ac89115d6d3a2c394c9da81 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 20:42:55 +0200 Subject: [PATCH 28/33] fuck model mapper temp --- pkg/azure/proxy.go | 284 +++++++++++---------------------------------- 1 file changed, 66 insertions(+), 218 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 4c0639d..b10ec3d 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -2,7 +2,6 @@ package azure import ( "bytes" - "encoding/json" "fmt" "io" "log" @@ -11,46 +10,14 @@ import ( "net/url" "os" "path" - "regexp" "strings" "github.com/tidwall/gjson" ) var ( - AzureOpenAIToken = "" - AzureOpenAIAPIVersion = "2024-06-01" - AzureOpenAIEndpoint = "" - AzureOpenAIModelMapper = map[string]string{ - "gpt-3.5-turbo": "gpt-35-turbo", - "gpt-3.5-turbo-0125": "gpt-35-turbo-0125", - "gpt-3.5-turbo-0613": "gpt-35-turbo-0613", - "gpt-3.5-turbo-1106": "gpt-35-turbo-1106", - "gpt-3.5-turbo-16k-0613": "gpt-35-turbo-16k-0613", - "gpt-3.5-turbo-instruct-0914": "gpt-35-turbo-instruct-0914", - "gpt-4": "gpt-4-0613", - "gpt-4-32k": "gpt-4-32k", - "gpt-4-32k-0613": "gpt-4-32k-0613", - "gpt-4o": "gpt-4o", - "gpt-4o-mini": "gpt-4o-mini", - "gpt-4o-2024-05-13": "gpt-4o-2024-05-13", - "gpt-4-turbo": "gpt-4-turbo", - "gpt-4-vision-preview": "gpt-4-vision-preview", - "gpt-4-turbo-2024-04-09": "gpt-4-turbo-2024-04-09", - "gpt-4-1106-preview": "gpt-4-1106-preview", - "text-embedding-ada-002": "text-embedding-ada-002", - "dall-e-2": "dall-e-2", - "dall-e-3": "dall-e-3", - "babbage-002": "babbage-002", - "davinci-002": "davinci-002", - "whisper-1": "whisper", - "tts-1": "tts", - "tts-1-hd": "tts-hd", - "text-embedding-3-small": "text-embedding-3-small-1", - "text-embedding-3-large": "text-embedding-3-large-1", - } - AzureAIStudioDeployments = make(map[string]string) - fallbackModelMapper = regexp.MustCompile(`[.:]`) + AzureOpenAIAPIVersion = "2024-06-01" + AzureOpenAIEndpoint = "" ServerlessDeploymentInfo = make(map[string]ServerlessDeployment) ) @@ -68,8 +35,6 @@ func init() { AzureOpenAIEndpoint = v } - handleModelMapper() - if v := os.Getenv("AZURE_AI_STUDIO_DEPLOYMENTS"); v != "" { for _, pair := range strings.Split(v, ",") { info := strings.Split(pair, "=") @@ -86,89 +51,82 @@ func init() { } } log.Printf("Loaded ServerlessDeploymentInfo: %+v", ServerlessDeploymentInfo) + log.Printf("Azure OpenAI Endpoint: %s", AzureOpenAIEndpoint) + log.Printf("Azure OpenAI API Version: %s", AzureOpenAIAPIVersion) +} - if v := os.Getenv("AZURE_OPENAI_TOKEN"); v != "" { - AzureOpenAIToken = v - log.Printf("loading azure api token from env") - } - - log.Printf("loading azure api endpoint: %s", AzureOpenAIEndpoint) - log.Printf("loading azure api version: %s", AzureOpenAIAPIVersion) - for k, v := range AzureOpenAIModelMapper { - log.Printf("final azure model mapper: %s -> %s", k, v) - } - for k, v := range AzureAIStudioDeployments { - log.Printf("loading azure ai studio deployment: %s -> %s", k, v) +func NewOpenAIReverseProxy() *httputil.ReverseProxy { + return &httputil.ReverseProxy{ + Director: makeDirector(), + ModifyResponse: modifyResponse, } - log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo)) } -func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) string { - token := fmt.Sprintf("Bearer %s", info.Key) - req.Header.Set("Authorization", token) - req.Header.Del("api-key") - log.Printf("Using serverless deployment authentication for %s", deployment) - return deployment // Return the actual deployment name -} +func makeDirector() func(*http.Request) { + return func(req *http.Request) { + model := getModelFromRequest(req) + originURL := req.URL.String() + log.Printf("Original request URL: %s for model: %s", originURL, model) -func handleRegularAuth(req *http.Request, deployment string) string { - var token string - if apiKey := req.Header.Get("api-key"); apiKey != "" { - token = apiKey - } else if authHeader := req.Header.Get("Authorization"); authHeader != "" { - token = strings.TrimPrefix(authHeader, "Bearer ") - } else if AzureOpenAIToken != "" { - token = AzureOpenAIToken - } else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" { - token = envApiKey - } + // Check if it's a serverless deployment + if info, ok := ServerlessDeploymentInfo[model]; ok { + handleServerlessRequest(req, info, model) + } else { + handleRegularRequest(req, model) + } - if token != "" { - req.Header.Set("api-key", token) - req.Header.Del("Authorization") - log.Printf("Using regular Azure OpenAI authentication for %s", deployment) - } else { - log.Printf("Warning: No authentication token found for deployment: %s", deployment) + log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String()) + log.Printf("Final request headers: %v", sanitizeHeaders(req.Header)) } - return deployment } -func handleModelMapper() { - overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override" +func handleServerlessRequest(req *http.Request, info ServerlessDeployment, model string) { + req.URL.Scheme = "https" + req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region) + req.Host = req.URL.Host - if v := os.Getenv("AZURE_OPENAI_MODEL_MAPPER"); v != "" { - for _, pair := range strings.Split(v, ",") { - info := strings.Split(pair, "=") - if len(info) == 2 { - if overrideMode { - AzureOpenAIModelMapper[info[0]] = info[1] - log.Printf("Overriding model mapping: %s -> %s", info[0], info[1]) - } else { - if _, exists := AzureOpenAIModelMapper[info[0]]; !exists { - AzureOpenAIModelMapper[info[0]] = info[1] - log.Printf("Adding new model mapping: %s -> %s", info[0], info[1]) - } else { - log.Printf("Skipping existing model mapping: %s", info[0]) - } - } - } else { - log.Printf("error parsing AZURE_OPENAI_MODEL_MAPPER, invalid value %s", pair) - } - } - } -} + // Keep the original path for serverless deployments + // req.URL.Path remains unchanged -func NewOpenAIReverseProxy() *httputil.ReverseProxy { - remote, err := url.Parse(AzureOpenAIEndpoint) - if err != nil { - log.Printf("error parse endpoint: %s\n", AzureOpenAIEndpoint) - os.Exit(1) - } + // Set the correct authorization header for serverless + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) + req.Header.Del("api-key") - return &httputil.ReverseProxy{ - Director: makeDirector(remote), - ModifyResponse: modifyResponse, - } + log.Printf("Using serverless deployment for %s", model) +} + +func handleRegularRequest(req *http.Request, model string) { + remote, _ := url.Parse(AzureOpenAIEndpoint) + req.URL.Scheme = remote.Scheme + req.URL.Host = remote.Host + req.Host = remote.Host + + // Construct the path for regular Azure OpenAI deployments + deployment := model // Use the model as the deployment name for regular deployments + switch { + case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"): + req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions") + case strings.HasPrefix(req.URL.Path, "/v1/completions"): + req.URL.Path = path.Join("/openai/deployments", deployment, "completions") + case strings.HasPrefix(req.URL.Path, "/v1/embeddings"): + req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings") + // Add other cases as needed + default: + req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/")) + } + + // Add api-version query parameter + query := req.URL.Query() + query.Add("api-version", AzureOpenAIAPIVersion) + req.URL.RawQuery = query.Encode() + + // Use the api-key from the original request for regular deployments + apiKey := req.Header.Get("api-key") + if apiKey == "" { + log.Printf("Warning: No api-key found for regular deployment: %s", model) + } + + log.Printf("Using regular Azure OpenAI deployment for %s", model) } func getModelFromRequest(req *http.Request) string { @@ -192,109 +150,6 @@ func sanitizeHeaders(headers http.Header) http.Header { return sanitized } -func extractDeploymentFromPath(path string) string { - parts := strings.Split(path, "/") - for i, part := range parts { - if part == "deployments" && i+1 < len(parts) { - return parts[i+1] - } - } - return "" -} - -func HandleToken(req *http.Request) string { - deployment := extractDeploymentFromPath(req.URL.Path) - - // First, try an exact match for serverless deployment - if info, ok := ServerlessDeploymentInfo[deployment]; ok { - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) - req.Header.Del("api-key") - log.Printf("Using serverless deployment authentication for %s", deployment) - return deployment - } - - // If no serverless match, proceed with regular Azure OpenAI authentication - var token string - if apiKey := req.Header.Get("api-key"); apiKey != "" { - token = apiKey - } else if authHeader := req.Header.Get("Authorization"); authHeader != "" { - token = strings.TrimPrefix(authHeader, "Bearer ") - } else if AzureOpenAIToken != "" { - token = AzureOpenAIToken - } else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" { - token = envApiKey - } - - if token != "" { - req.Header.Set("api-key", token) - req.Header.Del("Authorization") - log.Printf("Using regular Azure OpenAI authentication for %s", deployment) - } else { - log.Printf("Warning: No authentication token found for deployment: %s", deployment) - } - return deployment -} - -func makeDirector(remote *url.URL) func(*http.Request) { - return func(req *http.Request) { - model := getModelFromRequest(req) - deployment := HandleToken(req) - - originURL := req.URL.String() - log.Printf("Original request URL: %s for model: %s", originURL, model) - - if info, ok := ServerlessDeploymentInfo[deployment]; ok { - req.URL.Scheme = "https" - req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region) - req.Host = req.URL.Host - // For serverless, keep the original path - log.Printf("Using serverless deployment for %s", deployment) - } else { - req.URL.Scheme = remote.Scheme - req.URL.Host = remote.Host - req.Host = remote.Host - - // For regular Azure OpenAI, construct the path - switch { - case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"): - req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions") - case strings.HasPrefix(req.URL.Path, "/v1/completions"): - req.URL.Path = path.Join("/openai/deployments", deployment, "completions") - case strings.HasPrefix(req.URL.Path, "/v1/embeddings"): - req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings") - // ... (keep other cases) - default: - req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/")) - } - - // Only add api-version for non-serverless deployments - query := req.URL.Query() - query.Add("api-version", AzureOpenAIAPIVersion) - req.URL.RawQuery = query.Encode() - } - - req.URL.RawPath = req.URL.EscapedPath() - - if req.Body != nil { - var requestBody map[string]interface{} - bodyBytes, _ := io.ReadAll(req.Body) - json.Unmarshal(bodyBytes, &requestBody) - - newParams := []string{"completion_config", "presence_penalty", "frequency_penalty", "best_of"} - for _, param := range newParams { - if val, ok := requestBody[param]; ok { - log.Printf("Request includes %s parameter: %v", param, val) - } - } - - req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) - } - - log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String()) - log.Printf("Final request headers: %v", sanitizeHeaders(req.Header)) - } -} - func modifyResponse(res *http.Response) error { if res.StatusCode >= 400 { body, _ := io.ReadAll(res.Body) @@ -308,10 +163,3 @@ func modifyResponse(res *http.Response) error { return nil } - -func GetDeploymentByModel(model string) string { - if v, ok := AzureOpenAIModelMapper[model]; ok { - return v - } - return fallbackModelMapper.ReplaceAllString(model, "") -} From cb00f3af43b059b1d87e6158cfb14ad4a1df0d56 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 20:45:47 +0200 Subject: [PATCH 29/33] f u c k e r y --- pkg/azure/proxy.go | 82 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 13 deletions(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index b10ec3d..96ed7c2 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -19,6 +19,7 @@ var ( AzureOpenAIAPIVersion = "2024-06-01" AzureOpenAIEndpoint = "" ServerlessDeploymentInfo = make(map[string]ServerlessDeployment) + AzureOpenAIModelMapper = make(map[string]string) ) type ServerlessDeployment struct { @@ -41,7 +42,7 @@ func init() { if len(info) == 2 { deploymentInfo := strings.Split(info[1], ":") if len(deploymentInfo) == 2 { - ServerlessDeploymentInfo[info[0]] = ServerlessDeployment{ + ServerlessDeploymentInfo[strings.ToLower(info[0])] = ServerlessDeployment{ Name: deploymentInfo[0], Region: deploymentInfo[1], Key: os.Getenv("AZURE_OPENAI_KEY_" + strings.ToUpper(info[0])), @@ -50,6 +51,37 @@ func init() { } } } + + // Initialize AzureOpenAIModelMapper (you might want to load this from an environment variable or config file) + AzureOpenAIModelMapper = map[string]string{ + "gpt-3.5-turbo": "gpt-35-turbo", + "gpt-3.5-turbo-0125": "gpt-35-turbo-0125", + "gpt-3.5-turbo-0613": "gpt-35-turbo-0613", + "gpt-3.5-turbo-1106": "gpt-35-turbo-1106", + "gpt-3.5-turbo-16k-0613": "gpt-35-turbo-16k-0613", + "gpt-3.5-turbo-instruct-0914": "gpt-35-turbo-instruct-0914", + "gpt-4": "gpt-4-0613", + "gpt-4-32k": "gpt-4-32k", + "gpt-4-32k-0613": "gpt-4-32k-0613", + "gpt-4o": "gpt-4o", + "gpt-4o-mini": "gpt-4o-mini", + "gpt-4o-2024-05-13": "gpt-4o-2024-05-13", + "gpt-4-turbo": "gpt-4-turbo", + "gpt-4-vision-preview": "gpt-4-vision-preview", + "gpt-4-turbo-2024-04-09": "gpt-4-turbo-2024-04-09", + "gpt-4-1106-preview": "gpt-4-1106-preview", + "text-embedding-ada-002": "text-embedding-ada-002", + "dall-e-2": "dall-e-2", + "dall-e-3": "dall-e-3", + "babbage-002": "babbage-002", + "davinci-002": "davinci-002", + "whisper-1": "whisper", + "tts-1": "tts", + "tts-1-hd": "tts-hd", + "text-embedding-3-small": "text-embedding-3-small-1", + "text-embedding-3-large": "text-embedding-3-large-1", + } + log.Printf("Loaded ServerlessDeploymentInfo: %+v", ServerlessDeploymentInfo) log.Printf("Azure OpenAI Endpoint: %s", AzureOpenAIEndpoint) log.Printf("Azure OpenAI API Version: %s", AzureOpenAIAPIVersion) @@ -68,10 +100,16 @@ func makeDirector() func(*http.Request) { originURL := req.URL.String() log.Printf("Original request URL: %s for model: %s", originURL, model) + // Convert model to lowercase for case-insensitive matching + modelLower := strings.ToLower(model) + // Check if it's a serverless deployment - if info, ok := ServerlessDeploymentInfo[model]; ok { + if info, ok := ServerlessDeploymentInfo[modelLower]; ok { handleServerlessRequest(req, info, model) + } else if azureModel, ok := AzureOpenAIModelMapper[modelLower]; ok { + handleRegularRequest(req, azureModel) } else { + log.Printf("Warning: Unknown model %s, treating as regular Azure OpenAI deployment", model) handleRegularRequest(req, model) } @@ -85,8 +123,13 @@ func handleServerlessRequest(req *http.Request, info ServerlessDeployment, model req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region) req.Host = req.URL.Host - // Keep the original path for serverless deployments - // req.URL.Path remains unchanged + // Preserve query parameters from the original request + originalQuery := req.URL.Query() + for key, values := range originalQuery { + for _, value := range values { + req.URL.Query().Add(key, value) + } + } // Set the correct authorization header for serverless req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) @@ -95,14 +138,13 @@ func handleServerlessRequest(req *http.Request, info ServerlessDeployment, model log.Printf("Using serverless deployment for %s", model) } -func handleRegularRequest(req *http.Request, model string) { +func handleRegularRequest(req *http.Request, deployment string) { remote, _ := url.Parse(AzureOpenAIEndpoint) req.URL.Scheme = remote.Scheme req.URL.Host = remote.Host req.Host = remote.Host // Construct the path for regular Azure OpenAI deployments - deployment := model // Use the model as the deployment name for regular deployments switch { case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"): req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions") @@ -123,19 +165,33 @@ func handleRegularRequest(req *http.Request, model string) { // Use the api-key from the original request for regular deployments apiKey := req.Header.Get("api-key") if apiKey == "" { - log.Printf("Warning: No api-key found for regular deployment: %s", model) + log.Printf("Warning: No api-key found for regular deployment: %s", deployment) } - log.Printf("Using regular Azure OpenAI deployment for %s", model) + log.Printf("Using regular Azure OpenAI deployment for %s", deployment) } func getModelFromRequest(req *http.Request) string { - if req.Body == nil { - return "" + // First, try to get the model from the URL path + parts := strings.Split(req.URL.Path, "/") + for i, part := range parts { + if part == "deployments" && i+1 < len(parts) { + return parts[i+1] + } } - body, _ := io.ReadAll(req.Body) - req.Body = io.NopCloser(bytes.NewBuffer(body)) - return gjson.GetBytes(body, "model").String() + + // If not found in the path, try to get it from the request body + if req.Body != nil { + body, _ := io.ReadAll(req.Body) + req.Body = io.NopCloser(bytes.NewBuffer(body)) + model := gjson.GetBytes(body, "model").String() + if model != "" { + return model + } + } + + // If still not found, return an empty string + return "" } func sanitizeHeaders(headers http.Header) http.Header { From 00b971d4c3cb1dc8e6aa6f92180c40972ed55c25 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 20:58:18 +0200 Subject: [PATCH 30/33] FU C K R Y --- pkg/azure/proxy.go | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 96ed7c2..378caeb 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -94,12 +94,44 @@ func NewOpenAIReverseProxy() *httputil.ReverseProxy { } } +func HandleToken(req *http.Request) { + model := getModelFromRequest(req) + modelLower := strings.ToLower(model) + + // Check if it's a serverless deployment + if info, ok := ServerlessDeploymentInfo[modelLower]; ok { + // Set the correct authorization header for serverless + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key)) + req.Header.Del("api-key") + log.Printf("Using serverless deployment authentication for %s", model) + } else { + // For regular Azure OpenAI deployments, use the api-key + apiKey := req.Header.Get("api-key") + if apiKey == "" { + apiKey = req.Header.Get("Authorization") + if strings.HasPrefix(apiKey, "Bearer ") { + apiKey = strings.TrimPrefix(apiKey, "Bearer ") + } + } + if apiKey == "" { + log.Printf("Warning: No api-key or Authorization header found for deployment: %s", model) + } else { + req.Header.Set("api-key", apiKey) + req.Header.Del("Authorization") + log.Printf("Using regular Azure OpenAI authentication for %s", model) + } + } +} + func makeDirector() func(*http.Request) { return func(req *http.Request) { model := getModelFromRequest(req) originURL := req.URL.String() log.Printf("Original request URL: %s for model: %s", originURL, model) + // Handle the token + HandleToken(req) + // Convert model to lowercase for case-insensitive matching modelLower := strings.ToLower(model) From 679a07efb1a4e6cebb2535c1b4e97cac19d6d072 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 22:50:16 +0200 Subject: [PATCH 31/33] feat: Add Azure OAI proxy service configuration, docker compose example and updated readme --- .github/workflows/compose.yaml | 21 ++++ README.md | 208 ++++++++++++++++++--------------- 2 files changed, 133 insertions(+), 96 deletions(-) create mode 100644 .github/workflows/compose.yaml diff --git a/.github/workflows/compose.yaml b/.github/workflows/compose.yaml new file mode 100644 index 0000000..67baa9b --- /dev/null +++ b/.github/workflows/compose.yaml @@ -0,0 +1,21 @@ +services: + azure-oai-proxy: + # env_file: .env + image: 'gyarbij/azure-oai-proxy:latest' + # container_name: azure-oai-proxy + # Alternatively, use GitHub Container Registry: + # image: 'ghcr.io/gyarbij/azure-oai-proxy:latest' + restart: always + environment: + - AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/ + # - AZURE_OPENAI_PROXY_ADDRESS=0.0.0.0:11437 + # - AZURE_OPENAI_PROXY_MODE=azure + # - AZURE_OPENAI_APIVERSION=2024-06-01 + # - AZURE_OPENAI_MODEL_MAPPER=gpt-3.5-turbo=gpt-35-turbo,gpt-4=gpt-4-turbo + # - AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large-2407=Mistral-large2:swedencentral,llama-3.1-405B=Meta-Llama-3-1-405B-Instruct:northcentralus,llama-3.1-70B=Llama-31-70B:swedencentral + # - AZURE_OPENAI_KEY_MISTRAL-LARGE-2407=your-api-key-1 + # - AZURE_OPENAI_KEY_LLAMA-3.1-8B=your-api-key-2 + # - AZURE_OPENAI_KEY_LLAMA-3.1-70B=your-api-key-3 + ports: + - '11437:11437' + # Uncomment the following line to use an .env file: \ No newline at end of file diff --git a/README.md b/README.md index 257dbe4..19b1bac 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,24 @@ # Azure OpenAI Proxy [![Go Report Card](https://goreportcard.com/badge/github.com/Gyarbij/azure-oai-proxy)](https://goreportcard.com/report/github.com/Gyarbij/azure-oai-proxy) -[![License](https://badgen.net/badge/license/MIT/cyan)](https://github.com/gyarbij/azure-oai-proxy/blob/main/LICENSE) -[![Release](https://badgen.net/github/release/gyarbij/azure-oai-proxy/latest)](https://github.com/gyarbij/azure-oai-proxy) -[![Azure](https://badgen.net/badge/icon/Azure?icon=azure&label)](https://github.com/gyarbij/azure-oai-proxy) -[![Azure](https://badgen.net/badge/icon/OpenAI?icon=azure&label)](https://github.com/gyarbij/azure-oai-proxy) -[![Azure](https://badgen.net/badge/icon/docker?icon=docker&label)](https://github.com/gyarbij/azure-oai-proxy) +[![Main v Dev Commits](https://shields.git.vg/github/commits-difference/Gyarbij/azure-oai-proxy?base=main&head=dev)](https://github.com/gyarbij/azure-oai-proxy) +[![Taal](https://shields.git.vg/github/languages/top/Gyarbij/azure-oai-proxy)](https://github.com/gyarbij/azure-oai-proxy) +[![GHCR Build](https://shields.git.vg/github/actions/workflow/status/gyarbij/azure-oai-proxy/ghcr-docker-publish.yml)](https://github.com/gyarbij/azure-oai-proxy) +[![License](https://shields.git.vg/github/license/Gyarbij/azure-oai-proxy?style=for-the-badge&color=blue)](https://github.com/gyarbij/azure-oai-proxy/blob/main/LICENSE) ## Introduction -Azure OAI Proxy is a lightweight, high-performance proxy server that enables seamless integration between Azure OpenAI Services and applications designed for only OpenAI API compatible endpoints. This project bridges the gap for tools and services that are built to work with OpenAI's API structure but need to utilize Azure's OpenAI. +Azure OAI Proxy is a lightweight, high-performance proxy server that enables seamless integration between Azure OpenAI Services and applications designed for OpenAI API only compatible endpoints. This project bridges the gap for tools and services that are built to work with OpenAI's API structure but need to utilize Azure's OpenAI. ## Key Features - βœ… **API Compatibility**: Translates requests from OpenAI API format to Azure OpenAI Services format on-the-fly. - πŸ—ΊοΈ **Model Mapping**: Automatically maps OpenAI model names to Azure scheme. -- πŸ”„ **Dynamic Model List**: Fetches available models directly from your Azure OpenAI deployment to have feature parity with normal OpenAI, in projects such as Open WebUI. +- πŸ”„ **Dynamic Model List**: Fetches available models directly from your Azure OpenAI deployment. - 🌐 **Support for Multiple Endpoints**: Handles various API endpoints including image, speech, completions, chat completions, embeddings, and more. - 🚦 **Error Handling**: Provides meaningful error messages and logging for easier debugging. -- βš™οΈ **Configurable**: Easy to set up with environment variables for Azure OpenAI endpoint and API key. +- βš™οΈ **Configurable**: Easy to set up with environment variables for Azure AI/Azure OAI endpoint and API keys. +- πŸ” **Serverless Deployment Support**: Supports Azure AI serverless deployments with custom authentication. ## Use Cases @@ -45,119 +45,127 @@ Also, I strongly recommend using TSL/SSL for secure communication between the pr ## Supported APIs -The latest version of the Azure OpenAI service now supports the following APIs: +The latest version of the Azure OpenAI service supports the following APIs: + +| Path | Status | +|--------------------------------|--------| +| /v1/chat/completions | βœ… | +| /v1/completions | βœ… | +| /v1/embeddings | βœ… | +| /v1/images/generations | βœ… | +| /v1/fine_tunes | βœ… | +| /v1/files | βœ… | +| /v1/models | βœ… | +| /deployments | βœ… | +| /v1/audio/speech | βœ… | +| /v1/audio/transcriptions | βœ… | +| /v1/audio/translations | βœ… | +| /v1/models/:model_id/capabilities | βœ… | -| Path | Status | -| --------------------- | ------ | -| /v1/chat/completions | βœ… | -| /v1/completions | βœ… | -| /v1/embeddings | βœ… | -| /v1/images/generations | βœ… | -| /v1/fine_tunes | βœ… | -| /v1/files | βœ… | -| /v1/models | βœ… | -| /deployments | βœ… | -| /v1/audio | βœ… | - -> Other APIs not supported by Azure will be returned in a mock format (such as OPTIONS requests initiated by browsers). If you find your project need additional OpenAI-supported APIs, feel free to submit a PR. - -## Getting Started - -It's easy to get started with Azure OAI Proxy. You can either deploy it as a reverse proxy or use it as a forward proxy as detailed below. However if you're ready to jump right in and start using the proxy, you can use the following Docker command: - -```docker -docker pull gyarbij/azure-oai-proxy:latest +## Configuration -docker run -d -p 11437:11437 --name=azure-oai-proxy \ - --env AZURE_OPENAI_ENDPOINT=https://{YOURENDPOINT}.openai.azure.com \ - gyarbij/azure-oai-proxy:latest +### Environment Variables + +| Parameter | Description | Default Value | Required | +|-------------------------------|----------------------------------------------------------------------------|----------------------|----------| +| AZURE_OPENAI_ENDPOINT | Azure OpenAI Endpoint | | Yes | +| AZURE_OPENAI_PROXY_ADDRESS | Service listening address | 0.0.0.0:11437 | No | +| AZURE_OPENAI_PROXY_MODE | Proxy mode, can be either "azure" or "openai" | azure | No | +| AZURE_OPENAI_APIVERSION | Azure OpenAI API version | 2024-06-01 | No | +| AZURE_OPENAI_MODEL_MAPPER | Comma-separated list of model=deployment pairs | | No | +| AZURE_AI_STUDIO_DEPLOYMENTS | Comma-separated list of serverless deployments | | No | +| AZURE_OPENAI_KEY_* | API keys for serverless deployments (replace * with uppercase model name) | | No | + +## Usage + +### Docker Compose + +Here's an example `docker-compose.yml` file with all possible environment variable options: + +```yaml +services: + azure-oai-proxy: + image: 'gyarbij/azure-oai-proxy:latest' + # container_name: azure-oai-proxy + # Alternatively, use GitHub Container Registry: + # image: 'ghcr.io/gyarbij/azure-oai-proxy:latest' + restart: always + environment: + - AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/ + # - AZURE_OPENAI_PROXY_ADDRESS=0.0.0.0:11437 + # - AZURE_OPENAI_PROXY_MODE=azure + # - AZURE_OPENAI_APIVERSION=2024-06-01 + # - AZURE_OPENAI_MODEL_MAPPER=gpt-3.5-turbo=gpt-35-turbo,gpt-4=gpt-4-turbo + # - AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large-2407=Mistral-large2:swedencentral,llama-3.1-405B=Meta-Llama-3-1-405B-Instruct:northcentralus,llama-3.1-70B=Llama-31-70B:swedencentral + # - AZURE_OPENAI_KEY_MISTRAL-LARGE-2407=your-api-key-1 + # - AZURE_OPENAI_KEY_LLAMA-3.1-8B=your-api-key-2 + # - AZURE_OPENAI_KEY_LLAMA-3.1-70B=your-api-key-3 + ports: + - '11437:11437' + # Uncomment the following line to use an .env file: + # env_file: .env ``` -## Configuration +To use this configuration: -### 1. Used as reverse proxy (i.e. an OpenAI API gateway) +1. Save the above content in a file named `compose.yaml`. +2. Replace the placeholder values (e.g., `your-endpoint`, `your-api-key-1`, etc.) with your actual Azure OpenAI configuration. +3. Run the following command in the same directory as your `compose.yaml` file: -Environment Variables +```sh +docker compose up -d +``` -Here's the updated markdown table including a column for required: +### Using an .env File -| Parameters | Description | Default Value | Required | -| :------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------- | :------- | -| AZURE_OPENAI_PROXY_ADDRESS | Service listening address | 0.0.0.0:11437 | No | -| AZURE_OPENAI_PROXY_MODE | Proxy mode, can be either "azure" or "openai". | azure | No | -| AZURE_OPENAI_ENDPOINT | Azure OpenAI Endpoint, usually looks like https://{YOURDEPLOYMENT}.openai.azure.com. | | Yes | -| AZURE_OPENAI_APIVERSION | Azure OpenAI API version. Default is 2024-05-01-preview. | 2024-05-01-preview | No | -| AZURE_OPENAI_MODEL_MAPPER (Use for custom deployment names) | A comma-separated list of model=deployment pairs. Maps model names to deployment names. For example, `gpt-3.5-turbo=gpt-35-turbo`, `gpt-3.5-turbo-0301=gpt-35-turbo-0301`. If there is no match, the proxy will pass model as deployment name directly (most Azure model names are the same as OpenAI). | "" | No | -| AZURE_OPENAI_TOKEN | Azure OpenAI API Token. If this environment variable is set, the token in the request header will be ignored. | "" | No | +To use an .env file instead of environment variables in the Docker Compose file: -Use in command line +1. Create a file named `.env` in the same directory as your `docker-compose.yml`. +2. Add your environment variables to the `.env` file, one per line: -```shell -curl https://{your-custom-domain}/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer {your azure api key}" \ - -d '{ - "model": "gpt-3.5-turbo", - "messages": [{"role": "user", "content": "Hello!"}] - }' +``` +AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/ +AZURE_OPENAI_APIVERSION=2024-06-01 +AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large-2407=Mistral-large2:swedencentral,llama-3.1-405B=Meta-Llama-3-1-405B-Instruct:northcentralus +AZURE_OPENAI_KEY_MISTRAL-LARGE-2407=your-api-key-1 +AZURE_OPENAI_KEY_LLAMA-3.1-405B=your-api-key-2 ``` -### 2. Used as forward proxy (i.e. an HTTP proxy) - -When accessing Azure OpenAI API through HTTP, it can be used directly as a proxy, but this tool does not have built-in HTTPS support, so you need an HTTPS proxy such as Nginx to support accessing HTTPS version of OpenAI API. - -Assuming that the proxy domain you configured is `https://{your-domain}.com`, you can execute the following commands in the terminal to use the https proxy: - -```shell -export https_proxy=https://{your-domain}.com +3. Uncomment the `env_file: .env` line in your `docker-compose.yml`. +4. Run `docker-compose up -d` to start the container with the environment variables from the .env file. -curl https://api.openai.com/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer {your azure api key}" \ - -d '{ - "model": "gpt-3.5-turbo", - "messages": [{"role": "user", "content": "Hello!"}] - }' -``` +### Running from GitHub Container Registry -Or configure it as an HTTP proxy in other open source Web ChatGPT projects: +To run the Azure OAI Proxy using the image from GitHub Container Registry: -``` -export HTTPS_PROXY=https://{your-domain}.com +```sh +docker run -d -p 11437:11437 \ + -e AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/ \ + -e AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large-2407=Mistral-large2:swedencentral \ + -e AZURE_OPENAI_KEY_MISTRAL-LARGE-2407=your-api-key \ + ghcr.io/gyarbij/azure-oai-proxy:latest ``` -## Deploy +Replace the placeholder values with your actual Azure OpenAI configuration. -Docker Normal Deployment +## Usage Examples -```shell -docker pull gyarbij/azure-oai-proxy:latest -docker run -p 11437:11437 --name=azure-oai-proxy \ - --env AZURE_OPENAI_ENDPOINT=https://{YOURENDPOINT}.openai.azure.com/ \ - gyarbij/azure-oai-proxy:latest -``` -Docker with custom deployment names - -```shell -docker pull gyarbij/azure-oai-proxy:latest -docker run -p 11437:11437 --name=azure-oai-proxy \ - --env AZURE_OPENAI_ENDPOINT=https://{YOURENDPOINT}.openai.azure.com/ \ - --env AZURE_OPENAI_MODEL_MAPPER=gpt-3.5-turbo=dev-g35-turbo,gpt-4=gpt-4ooo \ - gyarbij/azure-oai-proxy:latest -``` +### Calling the API -Calling +Once the proxy is running, you can call it using the OpenAI API format: -```shell -curl https://localhost:11437/v1/chat/completions \ +```sh +curl http://localhost:11437/v1/chat/completions \ -H "Content-Type: application/json" \ - -H "Authorization: Bearer {your azure api key}" \ + -H "Authorization: Bearer your-azure-api-key" \ -d '{ "model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Hello!"}] }' ``` +For serverless deployments, use the model name as defined in your `AZURE_AI_STUDIO_DEPLOYMENTS` configuration. + ## Model Mapping Mechanism (Used for Custom deployment names) These are the default mappings for the most common models, if your Azure OpenAI deployment uses different names, you can set the `AZURE_OPENAI_MODEL_MAPPER` environment variable to define custom mappings.: @@ -173,7 +181,7 @@ These are the default mappings for the most common models, if your Azure OpenAI | `"gpt-4"` | `"gpt-4-0613"` | | `"gpt-4-32k"` | `"gpt-4-32k"` | | `"gpt-4-32k-0613"` | `"gpt-4-32k-0613"` | -| `"gpt-4o-mini"` | `"gpt-4o-mini"` | +| `"gpt-4o-mini"` | `"gpt-4o-mini-2024-07-18"` | | `"gpt-4o"` | `"gpt-4o"` | | `"gpt-4o-2024-05-13"` | `"gpt-4o-2024-05-13"` | | `"gpt-4-turbo"` | `"gpt-4-turbo"` | @@ -198,8 +206,16 @@ For custom fine-tuned models, the model name can be passed directly. For models | gpt-3.5-turbo | gpt-35-turbo-upgrade | | gpt-3.5-turbo-0301 | gpt-35-turbo-0301-fine-tuned | +## Important Notes + +- Always use HTTPS in production environments for secure communication. +- Regularly update the proxy to ensure compatibility with the latest Azure OpenAI API changes. +- Monitor your Azure OpenAI usage and costs, especially when using this proxy in high-traffic scenarios. + ## Recently Updated ++ 2024-07-25 Implemented support for Azure AI Studio deployments with support for Meta LLama 3.1, Mistral-2407 (mistral large 2), and other open models including from Cohere AI. ++ 2024-07-18 Added support for `gpt4o-mini`. + 2024-06-23 Implemented dynamic model fetching for `/v1/models endpoint`, replacing hardcoded model list. + 2024-06-23 Unified token handling mechanism across the application, improving consistency and security. + 2024-06-23 Added support for audio-related endpoints: `/v1/audio/speech`, `/v1/audio/transcriptions`, and `/v1/audio/translations`. @@ -216,11 +232,11 @@ For custom fine-tuned models, the model name can be passed directly. For models ## Contributing -We welcome contributions! Rest TBD. +Contributions are welcome! Please feel free to submit a Pull Request. ## License -MIT License +This project is licensed under the MIT License. ## Disclaimer From f183631c34aef34b882c8e06801a7a0b2a40a060 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 22:53:11 +0200 Subject: [PATCH 32/33] Updated README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 19b1bac..b102a24 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ For custom fine-tuned models, the model name can be passed directly. For models ## Recently Updated + 2024-07-25 Implemented support for Azure AI Studio deployments with support for Meta LLama 3.1, Mistral-2407 (mistral large 2), and other open models including from Cohere AI. -+ 2024-07-18 Added support for `gpt4o-mini`. ++ 2024-07-18 Added support for `gpt-4o-mini`. + 2024-06-23 Implemented dynamic model fetching for `/v1/models endpoint`, replacing hardcoded model list. + 2024-06-23 Unified token handling mechanism across the application, improving consistency and security. + 2024-06-23 Added support for audio-related endpoints: `/v1/audio/speech`, `/v1/audio/transcriptions`, and `/v1/audio/translations`. From 0e798a19e1b718afbf6fbe098134b7f36a620da4 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Thu, 25 Jul 2024 22:58:50 +0200 Subject: [PATCH 33/33] chore: Remove commented out log statement in makeDirector function for prod --- pkg/azure/proxy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go index 378caeb..4d93766 100644 --- a/pkg/azure/proxy.go +++ b/pkg/azure/proxy.go @@ -146,7 +146,7 @@ func makeDirector() func(*http.Request) { } log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String()) - log.Printf("Final request headers: %v", sanitizeHeaders(req.Header)) + // log.Printf("Final request headers: %v", sanitizeHeaders(req.Header)) } }