From 1b94f5061baf5b4aa5f8bdd4741c492241820dff Mon Sep 17 00:00:00 2001
From: Chono N <49493993+Gyarbij@users.noreply.github.com>
Date: Sat, 20 Jul 2024 19:45:11 +0200
Subject: [PATCH 01/33] Update .gitignore

---
 .gitignore | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index bbfd2cb..c622c7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,32 @@
 # Dependency directories (remove the comment below to include it)
 # vendor/
 
-.idea
\ No newline at end of file
+.idea
+
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk

From 636f1e3ac699baaedf858071ad5f897cfb53f7fe Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 15:18:29 +0200
Subject: [PATCH 02/33] - Added support for Azure AI Studio.

- Added base for serverless studio deployments.

- Added dotenv.

- Add sup and override mode.
---
 Dockerfile         |   2 +
 main.go            |  39 ++++++++++++++++
 pkg/azure/proxy.go |  64 +++++++++++++++-----------
 pkg/azure/types.go | 110 +++++++++++++++++++++++++++------------------
 4 files changed, 146 insertions(+), 69 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index adf39d5..886665e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,9 +1,11 @@
 FROM golang:1.22.5 AS builder
 WORKDIR /build
 COPY . .
+RUN go get github.com/joho/godotenv
 RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy .
 
 FROM gcr.io/distroless/base-debian12
 COPY --from=builder /build/azure-oai-proxy /
+COPY --from=builder /build/.env /.env
 EXPOSE 11437
 ENTRYPOINT ["/azure-oai-proxy"]
\ No newline at end of file
diff --git a/main.go b/main.go
index 28cfe4a..2fbfaed 100644
--- a/main.go
+++ b/main.go
@@ -7,10 +7,12 @@ import (
 	"log"
 	"net/http"
 	"os"
+	"strings"
 
 	"github.com/gin-gonic/gin"
 	"github.com/gyarbij/azure-oai-proxy/pkg/azure"
 	"github.com/gyarbij/azure-oai-proxy/pkg/openai"
+	"github.com/joho/godotenv"
 )
 
 var (
@@ -60,6 +62,43 @@ func init() {
 	log.Printf("loading azure openai proxy mode: %s", ProxyMode)
 }
 
+func init() {
+	// Load .env file if it exists
+	if err := godotenv.Load(); err != nil {
+		log.Println("No .env file found")
+	}
+
+	gin.SetMode(gin.ReleaseMode)
+	if v := os.Getenv("AZURE_OPENAI_PROXY_ADDRESS"); v != "" {
+		Address = v
+	}
+	if v := os.Getenv("AZURE_OPENAI_PROXY_MODE"); v != "" {
+		ProxyMode = v
+	}
+	log.Printf("loading azure openai proxy address: %s", Address)
+	log.Printf("loading azure openai proxy mode: %s", ProxyMode)
+
+	// Load Azure OpenAI Model Mapper
+	if v := os.Getenv("AZURE_OPENAI_MODEL_MAPPER"); v != "" {
+		for _, pair := range strings.Split(v, ",") {
+			info := strings.Split(pair, "=")
+			if len(info) == 2 {
+				azure.AzureOpenAIModelMapper[info[0]] = info[1]
+			}
+		}
+	}
+
+	// Load Azure AI Studio Deployments
+	if v := os.Getenv("AZURE_AI_STUDIO_DEPLOYMENTS"); v != "" {
+		for _, pair := range strings.Split(v, ",") {
+			info := strings.Split(pair, "=")
+			if len(info) == 2 {
+				azure.AzureAIStudioDeployments[info[0]] = info[1]
+			}
+		}
+	}
+}
+
 func main() {
 	router := gin.Default()
 	if ProxyMode == "azure" {
diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 7bcbbae..bcd1be0 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -48,7 +48,8 @@ var (
 		"text-embedding-3-small":      "text-embedding-3-small-1",
 		"text-embedding-3-large":      "text-embedding-3-large-1",
 	}
-	fallbackModelMapper = regexp.MustCompile(`[.:]`)
+	AzureAIStudioDeployments = make(map[string]string)
+	fallbackModelMapper      = regexp.MustCompile(`[.:]`)
 )
 
 func init() {
@@ -58,14 +59,17 @@ func init() {
 	if v := os.Getenv("AZURE_OPENAI_ENDPOINT"); v != "" {
 		AzureOpenAIEndpoint = v
 	}
-	if v := os.Getenv("AZURE_OPENAI_MODEL_MAPPER"); v != "" {
+
+	handleModelMapper()
+
+	if v := os.Getenv("AZURE_AI_STUDIO_DEPLOYMENTS"); v != "" {
 		for _, pair := range strings.Split(v, ",") {
 			info := strings.Split(pair, "=")
-			if len(info) != 2 {
-				log.Printf("error parsing AZURE_OPENAI_MODEL_MAPPER, invalid value %s", pair)
-				os.Exit(1)
+			if len(info) == 2 {
+				AzureAIStudioDeployments[info[0]] = info[1]
+			} else {
+				log.Printf("error parsing AZURE_AI_STUDIO_DEPLOYMENTS, invalid value %s", pair)
 			}
-			AzureOpenAIModelMapper[info[0]] = info[1]
 		}
 	}
 	if v := os.Getenv("AZURE_OPENAI_TOKEN"); v != "" {
@@ -76,7 +80,35 @@ func init() {
 	log.Printf("loading azure api endpoint: %s", AzureOpenAIEndpoint)
 	log.Printf("loading azure api version: %s", AzureOpenAIAPIVersion)
 	for k, v := range AzureOpenAIModelMapper {
-		log.Printf("loading azure model mapper: %s -> %s", k, v)
+		log.Printf("final azure model mapper: %s -> %s", k, v)
+	}
+	for k, v := range AzureAIStudioDeployments {
+		log.Printf("loading azure ai studio deployment: %s -> %s", k, v)
+	}
+}
+
+func handleModelMapper() {
+	overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override"
+
+	if v := os.Getenv("AZURE_OPENAI_MODEL_MAPPER"); v != "" {
+		for _, pair := range strings.Split(v, ",") {
+			info := strings.Split(pair, "=")
+			if len(info) == 2 {
+				if overrideMode {
+					AzureOpenAIModelMapper[info[0]] = info[1]
+					log.Printf("Overriding model mapping: %s -> %s", info[0], info[1])
+				} else {
+					if _, exists := AzureOpenAIModelMapper[info[0]]; !exists {
+						AzureOpenAIModelMapper[info[0]] = info[1]
+						log.Printf("Adding new model mapping: %s -> %s", info[0], info[1])
+					} else {
+						log.Printf("Skipping existing model mapping: %s", info[0])
+					}
+				}
+			} else {
+				log.Printf("error parsing AZURE_OPENAI_MODEL_MAPPER, invalid value %s", pair)
+			}
+		}
 	}
 }
 
@@ -102,7 +134,6 @@ func getModelFromRequest(req *http.Request) string {
 	return gjson.GetBytes(body, "model").String()
 }
 
-// sanitizeHeaders returns a copy of the headers with sensitive information redacted
 func sanitizeHeaders(headers http.Header) http.Header {
 	sanitized := make(http.Header)
 	for key, values := range headers {
@@ -118,48 +149,36 @@ func sanitizeHeaders(headers http.Header) http.Header {
 func HandleToken(req *http.Request) {
 	var token string
 
-	// Check for API Key in the api-key header
 	if apiKey := req.Header.Get("api-key"); apiKey != "" {
 		token = apiKey
 	} else if authHeader := req.Header.Get("Authorization"); authHeader != "" {
-		// If not found, check for Authorization header
 		token = strings.TrimPrefix(authHeader, "Bearer ")
 	} else if AzureOpenAIToken != "" {
-		// If neither is present, use the AzureOpenAIToken if set
 		token = AzureOpenAIToken
 	} else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" {
-		// As a last resort, check for API key in environment variable
 		token = envApiKey
 	}
 
 	if token != "" {
-		// Set the api-key header with the found token
 		req.Header.Set("api-key", token)
-		// Remove the Authorization header to avoid conflicts
 		req.Header.Del("Authorization")
 	} else {
 		log.Println("Warning: No authentication token found")
 	}
 }
 
-// Update the makeDirector function to handle the new endpoint structure
 func makeDirector(remote *url.URL) func(*http.Request) {
 	return func(req *http.Request) {
-
-		// Get model and map it to deployment
 		model := getModelFromRequest(req)
 		deployment := GetDeploymentByModel(model)
 
-		// Handle token
 		HandleToken(req)
 
-		// Set the Host, Scheme, Path, and RawPath of the request
 		originURL := req.URL.String()
 		req.Host = remote.Host
 		req.URL.Scheme = remote.Scheme
 		req.URL.Host = remote.Host
 
-		// Handle different endpoints
 		switch {
 		case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"):
 			req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions")
@@ -185,7 +204,6 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 
 		req.URL.RawPath = req.URL.EscapedPath()
 
-		// Add logging for new parameters
 		if req.Body != nil {
 			var requestBody map[string]interface{}
 			bodyBytes, _ := io.ReadAll(req.Body)
@@ -198,17 +216,14 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 				}
 			}
 
-			// Restore the body to the request
 			req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
 		}
 
-		// Add the api-version query parameter
 		query := req.URL.Query()
 		query.Add("api-version", AzureOpenAIAPIVersion)
 		req.URL.RawQuery = query.Encode()
 
 		log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String())
-		// log.Printf("Sanitized Request Headers: %v", sanitizeHeaders(req.Header))
 	}
 }
 
@@ -219,7 +234,6 @@ func modifyResponse(res *http.Response) error {
 		res.Body = io.NopCloser(bytes.NewBuffer(body))
 	}
 
-	// Handle streaming responses
 	if res.Header.Get("Content-Type") == "text/event-stream" {
 		res.Header.Set("X-Accel-Buffering", "no")
 	}
diff --git a/pkg/azure/types.go b/pkg/azure/types.go
index 70d68c8..dca0462 100644
--- a/pkg/azure/types.go
+++ b/pkg/azure/types.go
@@ -5,25 +5,43 @@ type ListModelResponse struct {
 	Data   []Model `json:"data"`
 }
 
+// AzureConfig represents the configuration for Azure OpenAI
+type AzureConfig struct {
+	APIVersion             string `json:"api_version"`
+	Endpoint               string `json:"endpoint"`
+	Token                  string `json:"token"`
+	ModelMapperMode        string `json:"model_mapper_mode"`
+	AIStudioDeploymentsRaw string `json:"ai_studio_deployments_raw"`
+}
+
+// AzureAIStudioDeployment represents a deployment in Azure AI Studio
+type AzureAIStudioDeployment struct {
+	ModelName      string `json:"model_name"`
+	DeploymentName string `json:"deployment_name"`
+	Region         string `json:"region"`
+}
+
+// Update Model struct to include new fields if necessary
 type Model struct {
-    ID              string       `json:"id"`
-    Object          string       `json:"object"`
-    CreatedAt       int64        `json:"created_at"`
-    Capabilities    Capabilities `json:"capabilities"`
-    LifecycleStatus string       `json:"lifecycle_status"`
-    Status          string       `json:"status"`
-    Deprecation     Deprecation  `json:"deprecation,omitempty"`
-    FineTune        string       `json:"fine_tune,omitempty"`
-	Created    int               `json:"created"`
-	OwnedBy    string            `json:"owned_by"`
-	Permission []ModelPermission `json:"permission"`
-	Root       string            `json:"root"`
-	Parent     any               `json:"parent"`
+	ID              string            `json:"id"`
+	Object          string            `json:"object"`
+	CreatedAt       int64             `json:"created_at"`
+	Capabilities    Capabilities      `json:"capabilities"`
+	LifecycleStatus string            `json:"lifecycle_status"`
+	Status          string            `json:"status"`
+	Deprecation     Deprecation       `json:"deprecation,omitempty"`
+	FineTune        string            `json:"fine_tune,omitempty"`
+	Created         int               `json:"created"`
+	OwnedBy         string            `json:"owned_by"`
+	Permission      []ModelPermission `json:"permission"`
+	Root            string            `json:"root"`
+	Parent          any               `json:"parent"`
+	// Add any new fields from the latest Azure OpenAI API here
 }
 
 type ModelList struct {
-    Object string  `json:"object"`
-    Data   []Model `json:"data"`
+	Object string  `json:"object"`
+	Data   []Model `json:"data"`
 }
 
 type ModelPermission struct {
@@ -59,10 +77,10 @@ type ListDeployedModelsResponse struct {
 
 // JSONModeRequest represents a request with JSON mode enabled
 type JSONModeRequest struct {
-	Model         string          `json:"model"`
-	Messages      []ChatMessage   `json:"messages"`
-	Temperature   float64         `json:"temperature,omitempty"`
-	MaxTokens     int             `json:"max_tokens,omitempty"`
+	Model          string          `json:"model"`
+	Messages       []ChatMessage   `json:"messages"`
+	Temperature    float64         `json:"temperature,omitempty"`
+	MaxTokens      int             `json:"max_tokens,omitempty"`
 	ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
 }
 
@@ -79,12 +97,12 @@ type ChatMessage struct {
 
 // JSONModeResponse represents a response when JSON mode is enabled
 type JSONModeResponse struct {
-	ID      string      `json:"id"`
-	Object  string      `json:"object"`
-	Created int64       `json:"created"`
-	Model   string      `json:"model"`
+	ID      string       `json:"id"`
+	Object  string       `json:"object"`
+	Created int64        `json:"created"`
+	Model   string       `json:"model"`
 	Choices []JSONChoice `json:"choices"`
-	Usage   Usage       `json:"usage"`
+	Usage   Usage        `json:"usage"`
 }
 
 // JSONChoice represents a choice in the JSON mode response
@@ -109,15 +127,15 @@ type DeploymentCapability struct {
 
 // DeploymentDetails represents detailed information about a deployment
 type DeploymentDetails struct {
-	ID                 string                 `json:"id"`
-	ModelID            string                 `json:"model"`
-	OwnerID            string                 `json:"owner"`
-	Status             string                 `json:"status"`
-	CreatedAt          string                 `json:"created_at"`
-	UpdatedAt          string                 `json:"updated_at"`
-	Capabilities       []DeploymentCapability `json:"capabilities"`
-	ScaleSettings      ScaleSettings          `json:"scale_settings"`
-	RaiPolicy          string                 `json:"rai_policy"`
+	ID            string                 `json:"id"`
+	ModelID       string                 `json:"model"`
+	OwnerID       string                 `json:"owner"`
+	Status        string                 `json:"status"`
+	CreatedAt     string                 `json:"created_at"`
+	UpdatedAt     string                 `json:"updated_at"`
+	Capabilities  []DeploymentCapability `json:"capabilities"`
+	ScaleSettings ScaleSettings          `json:"scale_settings"`
+	RaiPolicy     string                 `json:"rai_policy"`
 }
 
 // ScaleSettings represents the scale settings for a deployment
@@ -165,8 +183,8 @@ type ImageGenerationRequest struct {
 
 // ImageGenerationResponse represents the response structure from image generation
 type ImageGenerationResponse struct {
-	Created int64         `json:"created"`
-	Data    []ImageData   `json:"data"`
+	Created int64       `json:"created"`
+	Data    []ImageData `json:"data"`
 }
 
 // ImageData represents data of a generated image
@@ -176,8 +194,8 @@ type ImageData struct {
 
 // AudioTranscriptionRequest represents the request structure for audio transcription
 type AudioTranscriptionRequest struct {
-	Model   string `json:"model"`
-	Audio   []byte `json:"audio"`
+	Model    string `json:"model"`
+	Audio    []byte `json:"audio"`
 	Language string `json:"language,omitempty"`
 }
 
@@ -186,15 +204,19 @@ type AudioTranscriptionResponse struct {
 	Text string `json:"text"`
 }
 
+// Update if there are any changes to the capabilities
 type Capabilities struct {
-    ChatCompletion bool `json:"chat_completion"`
-    Completion     bool `json:"completion"`
-    Embeddings     bool `json:"embeddings"`
-    FineTune       bool `json:"fine_tune"`
-    Inference      bool `json:"inference"`
+	ChatCompletion bool `json:"chat_completion"`
+	Completion     bool `json:"completion"`
+	Embeddings     bool `json:"embeddings"`
+	FineTune       bool `json:"fine_tune"`
+	Inference      bool `json:"inference"`
+	// Add any new capabilities here
 }
 
+// Update if there are any changes to the deprecation structure
 type Deprecation struct {
-    FineTune  int `json:"fine_tune,omitempty"`
-    Inference int `json:"inference,omitempty"`
+	FineTune  int `json:"fine_tune,omitempty"`
+	Inference int `json:"inference,omitempty"`
+	// Add any new deprecation fields here
 }

From 677cac00347a344ee0b3951560319fba364c28f3 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 15:29:55 +0200
Subject: [PATCH 03/33] chore: Update Dockerfile to handle missing .env file
 during build

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 886665e..e4bf35d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,6 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy .
 
 FROM gcr.io/distroless/base-debian12
 COPY --from=builder /build/azure-oai-proxy /
-COPY --from=builder /build/.env /.env
+COPY --from=builder /build/.env /.env* 2>/dev/null || touch /.env
 EXPOSE 11437
 ENTRYPOINT ["/azure-oai-proxy"]
\ No newline at end of file

From f33da9ef7da017e9ea57f400706dec8bd279d899 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 15:38:39 +0200
Subject: [PATCH 04/33] chore: Update Dockerfile to handle missing .env file
 during build

---
 Dockerfile  | 8 +++++---
 example.env | 7 +++++++
 2 files changed, 12 insertions(+), 3 deletions(-)
 create mode 100644 example.env

diff --git a/Dockerfile b/Dockerfile
index e4bf35d..f938b16 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,11 +1,13 @@
-FROM golang:1.22.5 AS builder
+FROM golang:1.18 AS builder
 WORKDIR /build
 COPY . .
-RUN go get github.com/joho/godotenv
 RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy .
 
+RUN echo "#!/bin/sh\ncp /build/.env /app/.env 2>/dev/null || touch /app/.env" > /handle-env.sh
+RUN chmod +x /handle-env.sh
+
 FROM gcr.io/distroless/base-debian12
 COPY --from=builder /build/azure-oai-proxy /
-COPY --from=builder /build/.env /.env* 2>/dev/null || touch /.env
+COPY --from=builder /app/.env /.env
 EXPOSE 11437
 ENTRYPOINT ["/azure-oai-proxy"]
\ No newline at end of file
diff --git a/example.env b/example.env
new file mode 100644
index 0000000..fa9e655
--- /dev/null
+++ b/example.env
@@ -0,0 +1,7 @@
+AZURE_OPENAI_APIVERSION=2024-06-01
+AZURE_OPENAI_ENDPOINT=https://your-azure-openai-resource.openai.azure.com/
+AZURE_OPENAI_API_KEY=your-azure-openai-api-key
+AZURE_OPENAI_MODEL_MAPPER=gpt-3.5-turbo=gpt-35-turbo,gpt-4=gpt-4-0613
+AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large=Mistral-large2:swedencentral,llama-3=Meta-Llama-31-405B-Instruct:northcentralus
+AZURE_OPENAI_PROXY_ADDRESS=0.0.0.0:11437
+AZURE_OPENAI_PROXY_MODE=azure
\ No newline at end of file

From 2167f204bbed11457a8f8721379e017e07c64d68 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 15:42:40 +0200
Subject: [PATCH 05/33] chore: Update Dockerfile to use golang 1.22.5 and
 handle missing .env file during build

---
 Dockerfile | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index f938b16..5f97aea 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,13 +1,11 @@
-FROM golang:1.18 AS builder
+FROM golang:1.22.5 AS builder
 WORKDIR /build
 COPY . .
+RUN go get github.com/joho/godotenv
 RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy .
 
-RUN echo "#!/bin/sh\ncp /build/.env /app/.env 2>/dev/null || touch /app/.env" > /handle-env.sh
-RUN chmod +x /handle-env.sh
-
 FROM gcr.io/distroless/base-debian12
 COPY --from=builder /build/azure-oai-proxy /
-COPY --from=builder /app/.env /.env
+COPY --from=builder /build/.env / 2>/dev/null || true
 EXPOSE 11437
 ENTRYPOINT ["/azure-oai-proxy"]
\ No newline at end of file

From 5361d6bc5c8fc1dd208043548f6347ed4cb20e9f Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 15:45:58 +0200
Subject: [PATCH 06/33] chore: Update Dockerfile to handle missing .env file
 during build

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 5f97aea..6d13ebf 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,6 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy .
 
 FROM gcr.io/distroless/base-debian12
 COPY --from=builder /build/azure-oai-proxy /
-COPY --from=builder /build/.env / 2>/dev/null || true
+RUN ["/busybox/sh", "-c", "touch /.env"]
 EXPOSE 11437
 ENTRYPOINT ["/azure-oai-proxy"]
\ No newline at end of file

From f03d593489b6274dfbff9ad5df0f2f52a4d52586 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 15:52:54 +0200
Subject: [PATCH 07/33] chore: Update Dockerfile to handle missing .env file
 during build

---
 Dockerfile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 6d13ebf..0d0f2bd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,5 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o azure-oai-proxy .
 
 FROM gcr.io/distroless/base-debian12
 COPY --from=builder /build/azure-oai-proxy /
-RUN ["/busybox/sh", "-c", "touch /.env"]
 EXPOSE 11437
 ENTRYPOINT ["/azure-oai-proxy"]
\ No newline at end of file

From 3b98ce41142f5bc2c6d38edb9453de923e6dd653 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 16:22:55 +0200
Subject: [PATCH 08/33] chore: Initialize ServerlessDeploymentKeys map and
 handle serverless deployments

---
 pkg/azure/proxy.go | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index bcd1be0..fafb322 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -49,6 +49,7 @@ var (
 		"text-embedding-3-large":      "text-embedding-3-large-1",
 	}
 	AzureAIStudioDeployments = make(map[string]string)
+	ServerlessDeploymentKeys = make(map[string]string)
 	fallbackModelMapper      = regexp.MustCompile(`[.:]`)
 )
 
@@ -85,6 +86,16 @@ func init() {
 	for k, v := range AzureAIStudioDeployments {
 		log.Printf("loading azure ai studio deployment: %s -> %s", k, v)
 	}
+
+	// Initialize ServerlessDeploymentKeys
+	for key, value := range os.Environ() {
+		if strings.HasPrefix(key, "AZURE_OPENAI_KEY_") {
+			deploymentName := strings.TrimPrefix(key, "AZURE_OPENAI_KEY_")
+			ServerlessDeploymentKeys[strings.ToLower(deploymentName)] = value
+		}
+	}
+
+	log.Printf("Loaded %d serverless deployment keys", len(ServerlessDeploymentKeys))
 }
 
 func handleModelMapper() {
@@ -147,6 +158,16 @@ func sanitizeHeaders(headers http.Header) http.Header {
 }
 
 func HandleToken(req *http.Request) {
+	deployment := extractDeploymentFromPath(req.URL.Path)
+
+	// Check if it's a serverless deployment
+	if apiKey, ok := ServerlessDeploymentKeys[strings.ToLower(deployment)]; ok {
+		req.Header.Set("api-key", apiKey)
+		req.Header.Del("Authorization")
+		return
+	}
+
+	// Existing token handling logic
 	var token string
 
 	if apiKey := req.Header.Get("api-key"); apiKey != "" {
@@ -163,8 +184,18 @@ func HandleToken(req *http.Request) {
 		req.Header.Set("api-key", token)
 		req.Header.Del("Authorization")
 	} else {
-		log.Println("Warning: No authentication token found")
+		log.Println("Warning: No authentication token found for deployment:", deployment)
+	}
+}
+
+func extractDeploymentFromPath(path string) string {
+	parts := strings.Split(path, "/")
+	for i, part := range parts {
+		if part == "deployments" && i+1 < len(parts) {
+			return parts[i+1]
+		}
 	}
+	return ""
 }
 
 func makeDirector(remote *url.URL) func(*http.Request) {

From e4b6d56904163624aaee165280f73aa38e642d2a Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 16:31:40 +0200
Subject: [PATCH 09/33] chore: Refactor HandleToken function to handle
 serverless deployments and improve token handling logic

---
 pkg/azure/proxy.go | 74 ++++++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 35 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index fafb322..fd31deb 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -88,16 +88,51 @@ func init() {
 	}
 
 	// Initialize ServerlessDeploymentKeys
-	for key, value := range os.Environ() {
-		if strings.HasPrefix(key, "AZURE_OPENAI_KEY_") {
-			deploymentName := strings.TrimPrefix(key, "AZURE_OPENAI_KEY_")
-			ServerlessDeploymentKeys[strings.ToLower(deploymentName)] = value
+	for _, env := range os.Environ() {
+		parts := strings.SplitN(env, "=", 2)
+		if len(parts) == 2 {
+			key, value := parts[0], parts[1]
+			if strings.HasPrefix(key, "AZURE_OPENAI_KEY_") {
+				deploymentName := strings.TrimPrefix(key, "AZURE_OPENAI_KEY_")
+				ServerlessDeploymentKeys[strings.ToLower(deploymentName)] = value
+			}
 		}
 	}
 
 	log.Printf("Loaded %d serverless deployment keys", len(ServerlessDeploymentKeys))
 }
 
+func HandleToken(req *http.Request) {
+	deployment := extractDeploymentFromPath(req.URL.Path)
+
+	// Check if it's a serverless deployment
+	if apiKey, ok := ServerlessDeploymentKeys[strings.ToLower(deployment)]; ok {
+		req.Header.Set("api-key", apiKey)
+		req.Header.Del("Authorization")
+		return
+	}
+
+	// Existing token handling logic
+	var token string
+
+	if apiKey := req.Header.Get("api-key"); apiKey != "" {
+		token = apiKey
+	} else if authHeader := req.Header.Get("Authorization"); authHeader != "" {
+		token = strings.TrimPrefix(authHeader, "Bearer ")
+	} else if AzureOpenAIToken != "" {
+		token = AzureOpenAIToken
+	} else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" {
+		token = envApiKey
+	}
+
+	if token != "" {
+		req.Header.Set("api-key", token)
+		req.Header.Del("Authorization")
+	} else {
+		log.Println("Warning: No authentication token found for deployment:", deployment)
+	}
+}
+
 func handleModelMapper() {
 	overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override"
 
@@ -157,37 +192,6 @@ func sanitizeHeaders(headers http.Header) http.Header {
 	return sanitized
 }
 
-func HandleToken(req *http.Request) {
-	deployment := extractDeploymentFromPath(req.URL.Path)
-
-	// Check if it's a serverless deployment
-	if apiKey, ok := ServerlessDeploymentKeys[strings.ToLower(deployment)]; ok {
-		req.Header.Set("api-key", apiKey)
-		req.Header.Del("Authorization")
-		return
-	}
-
-	// Existing token handling logic
-	var token string
-
-	if apiKey := req.Header.Get("api-key"); apiKey != "" {
-		token = apiKey
-	} else if authHeader := req.Header.Get("Authorization"); authHeader != "" {
-		token = strings.TrimPrefix(authHeader, "Bearer ")
-	} else if AzureOpenAIToken != "" {
-		token = AzureOpenAIToken
-	} else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" {
-		token = envApiKey
-	}
-
-	if token != "" {
-		req.Header.Set("api-key", token)
-		req.Header.Del("Authorization")
-	} else {
-		log.Println("Warning: No authentication token found for deployment:", deployment)
-	}
-}
-
 func extractDeploymentFromPath(path string) string {
 	parts := strings.Split(path, "/")
 	for i, part := range parts {

From 4d8a5cd336f5a824bdf3be9da53e96622d5b145b Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 16:56:33 +0200
Subject: [PATCH 10/33] chore: Add serverless deployments to the models list

---
 main.go | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/main.go b/main.go
index 2fbfaed..2ea0048 100644
--- a/main.go
+++ b/main.go
@@ -148,6 +148,21 @@ func handleGetModels(c *gin.Context) {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch deployed models"})
 		return
 	}
+
+	// Add serverless deployments to the models list
+	for deploymentName := range azure.ServerlessDeploymentKeys {
+		models = append(models, Model{
+			ID:     deploymentName,
+			Object: "model",
+			Capabilities: Capabilities{
+				Completion:     true,
+				ChatCompletion: true,
+			},
+			LifecycleStatus: "active",
+			Status:          "ready",
+		})
+	}
+
 	result := ModelList{
 		Object: "list",
 		Data:   models,

From fa089a8f24aafe7a092d88bd7b2778bcffb5ed42 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:15:11 +0200
Subject: [PATCH 11/33] Added a ServerlessDeploymentInfo map to store
 information about serverless deployments. Modified the init() function to
 parse the AZURE_AI_STUDIO_DEPLOYMENTS environment variable and populate both
 AzureAIStudioDeployments and ServerlessDeploymentInfo. Updated HandleToken()
 to use the serverless deployment key when appropriate. Modified
 makeDirector() to construct the correct URL for serverless deployments.

---
 pkg/azure/proxy.go | 53 ++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index fd31deb..6d416d7 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -3,6 +3,7 @@ package azure
 import (
 	"bytes"
 	"encoding/json"
+	"fmt"
 	"io"
 	"log"
 	"net/http"
@@ -49,10 +50,16 @@ var (
 		"text-embedding-3-large":      "text-embedding-3-large-1",
 	}
 	AzureAIStudioDeployments = make(map[string]string)
-	ServerlessDeploymentKeys = make(map[string]string)
+	ServerlessDeploymentInfo = make(map[string]ServerlessDeployment)
 	fallbackModelMapper      = regexp.MustCompile(`[.:]`)
 )
 
+type ServerlessDeployment struct {
+	Name   string
+	Region string
+	Key    string
+}
+
 func init() {
 	if v := os.Getenv("AZURE_OPENAI_APIVERSION"); v != "" {
 		AzureOpenAIAPIVersion = v
@@ -67,12 +74,21 @@ func init() {
 		for _, pair := range strings.Split(v, ",") {
 			info := strings.Split(pair, "=")
 			if len(info) == 2 {
-				AzureAIStudioDeployments[info[0]] = info[1]
+				deploymentInfo := strings.Split(info[1], ":")
+				if len(deploymentInfo) == 2 {
+					AzureAIStudioDeployments[info[0]] = deploymentInfo[0]
+					ServerlessDeploymentInfo[strings.ToLower(info[0])] = ServerlessDeployment{
+						Name:   deploymentInfo[0],
+						Region: deploymentInfo[1],
+						Key:    os.Getenv("AZURE_OPENAI_KEY_" + strings.ToUpper(info[0])),
+					}
+				}
 			} else {
 				log.Printf("error parsing AZURE_AI_STUDIO_DEPLOYMENTS, invalid value %s", pair)
 			}
 		}
 	}
+
 	if v := os.Getenv("AZURE_OPENAI_TOKEN"); v != "" {
 		AzureOpenAIToken = v
 		log.Printf("loading azure api token from env")
@@ -86,28 +102,15 @@ func init() {
 	for k, v := range AzureAIStudioDeployments {
 		log.Printf("loading azure ai studio deployment: %s -> %s", k, v)
 	}
-
-	// Initialize ServerlessDeploymentKeys
-	for _, env := range os.Environ() {
-		parts := strings.SplitN(env, "=", 2)
-		if len(parts) == 2 {
-			key, value := parts[0], parts[1]
-			if strings.HasPrefix(key, "AZURE_OPENAI_KEY_") {
-				deploymentName := strings.TrimPrefix(key, "AZURE_OPENAI_KEY_")
-				ServerlessDeploymentKeys[strings.ToLower(deploymentName)] = value
-			}
-		}
-	}
-
-	log.Printf("Loaded %d serverless deployment keys", len(ServerlessDeploymentKeys))
+	log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo))
 }
 
 func HandleToken(req *http.Request) {
 	deployment := extractDeploymentFromPath(req.URL.Path)
 
 	// Check if it's a serverless deployment
-	if apiKey, ok := ServerlessDeploymentKeys[strings.ToLower(deployment)]; ok {
-		req.Header.Set("api-key", apiKey)
+	if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
+		req.Header.Set("api-key", info.Key)
 		req.Header.Del("Authorization")
 		return
 	}
@@ -210,9 +213,17 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 		HandleToken(req)
 
 		originURL := req.URL.String()
-		req.Host = remote.Host
-		req.URL.Scheme = remote.Scheme
-		req.URL.Host = remote.Host
+
+		// Check if it's a serverless deployment
+		if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
+			req.URL.Scheme = "https"
+			req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region)
+			req.Host = req.URL.Host
+		} else {
+			req.Host = remote.Host
+			req.URL.Scheme = remote.Scheme
+			req.URL.Host = remote.Host
+		}
 
 		switch {
 		case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"):

From 9233ce36ec732cde909bb94fcf47709f780bbcc4 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:20:12 +0200
Subject: [PATCH 12/33] Removed the parsing of AZURE_AI_STUDIO_DEPLOYMENTS from
 main.go, now handled in azure.init() . Updated handleGetModels function to
 include serverless deployments: It now iterates over
 azure.ServerlessDeploymentInfo to add serverless deployments to the list of
 models.

Serverless deployments have base: (Completion, ChatCompletion, and Inference).
---
 main.go | 25 ++-----------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/main.go b/main.go
index 2ea0048..d685cb6 100644
--- a/main.go
+++ b/main.go
@@ -50,18 +50,6 @@ type Deprecation struct {
 	Inference int64 `json:"inference"`
 }
 
-func init() {
-	gin.SetMode(gin.ReleaseMode)
-	if v := os.Getenv("AZURE_OPENAI_PROXY_ADDRESS"); v != "" {
-		Address = v
-	}
-	if v := os.Getenv("AZURE_OPENAI_PROXY_MODE"); v != "" {
-		ProxyMode = v
-	}
-	log.Printf("loading azure openai proxy address: %s", Address)
-	log.Printf("loading azure openai proxy mode: %s", ProxyMode)
-}
-
 func init() {
 	// Load .env file if it exists
 	if err := godotenv.Load(); err != nil {
@@ -87,16 +75,6 @@ func init() {
 			}
 		}
 	}
-
-	// Load Azure AI Studio Deployments
-	if v := os.Getenv("AZURE_AI_STUDIO_DEPLOYMENTS"); v != "" {
-		for _, pair := range strings.Split(v, ",") {
-			info := strings.Split(pair, "=")
-			if len(info) == 2 {
-				azure.AzureAIStudioDeployments[info[0]] = info[1]
-			}
-		}
-	}
 }
 
 func main() {
@@ -150,13 +128,14 @@ func handleGetModels(c *gin.Context) {
 	}
 
 	// Add serverless deployments to the models list
-	for deploymentName := range azure.ServerlessDeploymentKeys {
+	for deploymentName, info := range azure.ServerlessDeploymentInfo {
 		models = append(models, Model{
 			ID:     deploymentName,
 			Object: "model",
 			Capabilities: Capabilities{
 				Completion:     true,
 				ChatCompletion: true,
+				Inference:      true,
 			},
 			LifecycleStatus: "active",
 			Status:          "ready",

From b42e8ce92a7bca1e0799feab1bfa59a086724720 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:23:24 +0200
Subject: [PATCH 13/33] chore: Update go.mod to include
 github.com/joho/godotenv v1.5.1

---
 go.mod | 1 +
 go.sum | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/go.mod b/go.mod
index 0ff66a5..682f70a 100644
--- a/go.mod
+++ b/go.mod
@@ -4,6 +4,7 @@ go 1.22.4
 
 require (
 	github.com/gin-gonic/gin v1.10.0
+	github.com/joho/godotenv v1.5.1
 	github.com/tidwall/gjson v1.17.1
 )
 
diff --git a/go.sum b/go.sum
index a4ff04c..4c2d16e 100644
--- a/go.sum
+++ b/go.sum
@@ -28,6 +28,8 @@ github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MG
 github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
+github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=

From e3d29f4a03e1fd3c10ebd69c3da361a6d142a6de Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:28:14 +0200
Subject: [PATCH 14/33] remove unused import _info

---
 main.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.go b/main.go
index d685cb6..8a8ec8b 100644
--- a/main.go
+++ b/main.go
@@ -128,7 +128,7 @@ func handleGetModels(c *gin.Context) {
 	}
 
 	// Add serverless deployments to the models list
-	for deploymentName, info := range azure.ServerlessDeploymentInfo {
+	for deploymentName := range azure.ServerlessDeploymentInfo {
 		models = append(models, Model{
 			ID:     deploymentName,
 			Object: "model",

From 4f5c828fbea663207254eda2929c3e727bef2bf4 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:37:41 +0200
Subject: [PATCH 15/33] chore: Refactor HandleToken function to handle
 serverless auth and improve token handling logic

---
 pkg/azure/proxy.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 6d416d7..80022fb 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -110,12 +110,12 @@ func HandleToken(req *http.Request) {
 
 	// Check if it's a serverless deployment
 	if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
-		req.Header.Set("api-key", info.Key)
-		req.Header.Del("Authorization")
+		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
+		req.Header.Del("api-key")
 		return
 	}
 
-	// Existing token handling logic
+	// Existing token handling logic for non-serverless deployments
 	var token string
 
 	if apiKey := req.Header.Get("api-key"); apiKey != "" {

From cb1279cc31f6318bfb15fa6928c1bb0b584f73c6 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:55:48 +0200
Subject: [PATCH 16/33] chore: Refactor HandleToken function to handle
 serverless deployments and improve token handling logic

---
 pkg/azure/proxy.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 80022fb..94942cc 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -108,14 +108,13 @@ func init() {
 func HandleToken(req *http.Request) {
 	deployment := extractDeploymentFromPath(req.URL.Path)
 
-	// Check if it's a serverless deployment
 	if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
 		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
 		req.Header.Del("api-key")
+		log.Printf("Using serverless deployment authentication for %s", deployment)
 		return
 	}
 
-	// Existing token handling logic for non-serverless deployments
 	var token string
 
 	if apiKey := req.Header.Get("api-key"); apiKey != "" {
@@ -131,8 +130,9 @@ func HandleToken(req *http.Request) {
 	if token != "" {
 		req.Header.Set("api-key", token)
 		req.Header.Del("Authorization")
+		log.Printf("Using regular Azure OpenAI authentication for %s", deployment)
 	} else {
-		log.Println("Warning: No authentication token found for deployment:", deployment)
+		log.Printf("Warning: No authentication token found for deployment: %s", deployment)
 	}
 }
 

From 7dd79bd628bf3d39082efe94a9bb5531a54676bf Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:15:01 +0200
Subject: [PATCH 17/33] handle serverless deployments url correctly

---
 pkg/azure/proxy.go | 48 +++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 94942cc..59e5c67 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -219,33 +219,37 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 			req.URL.Scheme = "https"
 			req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region)
 			req.Host = req.URL.Host
+
+			// For serverless, keep the original path with '/v1' prefix
+			req.URL.Path = req.URL.Path
 		} else {
 			req.Host = remote.Host
 			req.URL.Scheme = remote.Scheme
 			req.URL.Host = remote.Host
-		}
 
-		switch {
-		case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"):
-			req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions")
-		case strings.HasPrefix(req.URL.Path, "/v1/completions"):
-			req.URL.Path = path.Join("/openai/deployments", deployment, "completions")
-		case strings.HasPrefix(req.URL.Path, "/v1/embeddings"):
-			req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings")
-		case strings.HasPrefix(req.URL.Path, "/v1/images/generations"):
-			req.URL.Path = path.Join("/openai/deployments", deployment, "images/generations")
-		case strings.HasPrefix(req.URL.Path, "/v1/fine_tunes"):
-			req.URL.Path = path.Join("/openai/deployments", deployment, "fine-tunes")
-		case strings.HasPrefix(req.URL.Path, "/v1/files"):
-			req.URL.Path = path.Join("/openai/deployments", deployment, "files")
-		case strings.HasPrefix(req.URL.Path, "/v1/audio/speech"):
-			req.URL.Path = path.Join("/openai/deployments", deployment, "audio/speech")
-		case strings.HasPrefix(req.URL.Path, "/v1/audio/transcriptions"):
-			req.URL.Path = path.Join("/openai/deployments", deployment, "transcriptions")
-		case strings.HasPrefix(req.URL.Path, "/v1/audio/translations"):
-			req.URL.Path = path.Join("/openai/deployments", deployment, "translations")
-		default:
-			req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/"))
+			// For regular Azure OpenAI, construct the path
+			switch {
+			case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"):
+				req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions")
+			case strings.HasPrefix(req.URL.Path, "/v1/completions"):
+				req.URL.Path = path.Join("/openai/deployments", deployment, "completions")
+			case strings.HasPrefix(req.URL.Path, "/v1/embeddings"):
+				req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings")
+			case strings.HasPrefix(req.URL.Path, "/v1/images/generations"):
+				req.URL.Path = path.Join("/openai/deployments", deployment, "images/generations")
+			case strings.HasPrefix(req.URL.Path, "/v1/fine_tunes"):
+				req.URL.Path = path.Join("/openai/deployments", deployment, "fine-tunes")
+			case strings.HasPrefix(req.URL.Path, "/v1/files"):
+				req.URL.Path = path.Join("/openai/deployments", deployment, "files")
+			case strings.HasPrefix(req.URL.Path, "/v1/audio/speech"):
+				req.URL.Path = path.Join("/openai/deployments", deployment, "audio/speech")
+			case strings.HasPrefix(req.URL.Path, "/v1/audio/transcriptions"):
+				req.URL.Path = path.Join("/openai/deployments", deployment, "transcriptions")
+			case strings.HasPrefix(req.URL.Path, "/v1/audio/translations"):
+				req.URL.Path = path.Join("/openai/deployments", deployment, "translations")
+			default:
+				req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/"))
+			}
 		}
 
 		req.URL.RawPath = req.URL.EscapedPath()

From fbde5f450fe93c55a806695acb4cf9bbbf395217 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:24:30 +0200
Subject: [PATCH 18/33] introduce a "isServerless" boolean flag to track
 whether the current request is for a serverless deployment. For serverless
 deployments:

set "isServerless" to true.
We keep the original path, including the '/v1' prefix.

does not modify the query parameters.

For regular Azure OpenAI deployments:

Add the api-version query parameter if "isServerless" is false.
---
 pkg/azure/proxy.go | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 59e5c67..793130b 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -214,8 +214,10 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 
 		originURL := req.URL.String()
 
+		isServerless := false
 		// Check if it's a serverless deployment
 		if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
+			isServerless = true
 			req.URL.Scheme = "https"
 			req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region)
 			req.Host = req.URL.Host
@@ -269,9 +271,12 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 			req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
 		}
 
-		query := req.URL.Query()
-		query.Add("api-version", AzureOpenAIAPIVersion)
-		req.URL.RawQuery = query.Encode()
+		// Only add api-version for non-serverless deployments
+		if !isServerless {
+			query := req.URL.Query()
+			query.Add("api-version", AzureOpenAIAPIVersion)
+			req.URL.RawQuery = query.Encode()
+		}
 
 		log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String())
 	}

From 3d320b8d212d76fb4b72494210685e02294256f9 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:38:24 +0200
Subject: [PATCH 19/33] Introduce "isServerless" flag to track serverless
 deployments and handle URL construction

---
 pkg/azure/proxy.go | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 793130b..d549ae7 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -214,16 +214,12 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 
 		originURL := req.URL.String()
 
-		isServerless := false
-		// Check if it's a serverless deployment
 		if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
-			isServerless = true
 			req.URL.Scheme = "https"
 			req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region)
 			req.Host = req.URL.Host
-
 			// For serverless, keep the original path with '/v1' prefix
-			req.URL.Path = req.URL.Path
+			log.Printf("Using serverless deployment for %s", deployment)
 		} else {
 			req.Host = remote.Host
 			req.URL.Scheme = remote.Scheme
@@ -252,6 +248,11 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 			default:
 				req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/"))
 			}
+
+			// Only add api-version for non-serverless deployments
+			query := req.URL.Query()
+			query.Add("api-version", AzureOpenAIAPIVersion)
+			req.URL.RawQuery = query.Encode()
 		}
 
 		req.URL.RawPath = req.URL.EscapedPath()
@@ -271,13 +272,6 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 			req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
 		}
 
-		// Only add api-version for non-serverless deployments
-		if !isServerless {
-			query := req.URL.Query()
-			query.Add("api-version", AzureOpenAIAPIVersion)
-			req.URL.RawQuery = query.Encode()
-		}
-
 		log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String())
 	}
 }
@@ -289,6 +283,7 @@ func modifyResponse(res *http.Response) error {
 		res.Body = io.NopCloser(bytes.NewBuffer(body))
 	}
 
+	// Handle streaming responses
 	if res.Header.Get("Content-Type") == "text/event-stream" {
 		res.Header.Set("X-Accel-Buffering", "no")
 	}

From 1aa7eaebe94d5e63fbe5044fa465628768c2519e Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:41:35 +0200
Subject: [PATCH 20/33] remove cargo

---
 .github/dependabot.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index bd4568a..6673b46 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -13,7 +13,3 @@ updates:
     directory: "/" # Location of package manifests
     schedule:
       interval: "weekly"
-  - package-ecosystem: "cargo" # See documentation for possible values
-    directory: "/" # Location of package manifests
-    schedule:
-      interval: "weekly"

From 3651c8f11004c06a2309088c04636c83947f3fed Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:59:33 +0200
Subject: [PATCH 21/33] azoa

---
 pkg/azure/proxy.go | 51 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 3 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index d549ae7..c712d67 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -105,11 +105,46 @@ func init() {
 	log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo))
 }
 
-func HandleToken(req *http.Request) {
-	deployment := extractDeploymentFromPath(req.URL.Path)
+func proxyRequest(w http.ResponseWriter, r *http.Request) {
+	client := &http.Client{}
+	body, err := ioutil.ReadAll(r.Body)
+	if err != nil {
+		http.Error(w, "Failed to read request body", http.StatusInternalServerError)
+		return
+	}
+
+	req, err := http.NewRequest("POST", "https://Mistral-large2.swedencentral.models.ai.azure.com/v1/chat/completions", strings.NewReader(string(body)))
+	if err != nil {
+		http.Error(w, "Failed to create request", http.StatusInternalServerError)
+		return
+	}
+
+	// Forward headers from the original request
+	for name, values := range r.Header {
+		for _, value := range values {
+			req.Header.Add(name, value)
+		}
+	}
+
+	handleToken(req, r.URL.Path)
+
+	resp, err := client.Do(req)
+	if err != nil {
+		http.Error(w, "Failed to make request", http.StatusInternalServerError)
+		return
+	}
+	defer resp.Body.Close()
+
+	respBody, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		http.Error(w, "Failed to read response body", http.StatusInternalServerError)
+		return
+	}
+
+	w.WriteHeader(resp.Sta// Removed duplicate function declarationctDeploymentFromPath(path)
 
 	if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
-		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
+		req.Header.Set("Authorization", "Bearer "+info.Key)
 		req.Header.Del("api-key")
 		log.Printf("Using serverless deployment authentication for %s", deployment)
 		return
@@ -136,6 +171,16 @@ func HandleToken(req *http.Request) {
 	}
 }
 
+func extractDeploymentFromPath(path string) string {
+	parts := strings.Split(path, "/")
+	for i, part := range parts {
+		if part == "deployments" && i+1 < len(parts) {
+			return parts[i+1]
+		}
+	}
+	return ""
+}
+
 func handleModelMapper() {
 	overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override"
 

From 0b2f7293165130bb7643433b9de1b75d9a115391 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 19:21:23 +0200
Subject: [PATCH 22/33] Refactor proxyRequest function to handle response
 writing and error handling

---
 pkg/azure/proxy.go | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index c712d67..d814052 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"log"
 	"net/http"
 	"net/http/httputil"
@@ -141,7 +142,15 @@ func proxyRequest(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	w.WriteHeader(resp.Sta// Removed duplicate function declarationctDeploymentFromPath(path)
+	w.WriteHeader(resp.StatusCode)
+	_, err = w.Write(respBody)
+	if err != nil {
+		http.Error(w, "Failed to write response", http.StatusInternalServerError)
+	}
+}
+
+func handleToken(req *http.Request, path string) {
+	deployment := extractDeploymentFromPath(path)
 
 	if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
 		req.Header.Set("Authorization", "Bearer "+info.Key)
@@ -240,22 +249,12 @@ func sanitizeHeaders(headers http.Header) http.Header {
 	return sanitized
 }
 
-func extractDeploymentFromPath(path string) string {
-	parts := strings.Split(path, "/")
-	for i, part := range parts {
-		if part == "deployments" && i+1 < len(parts) {
-			return parts[i+1]
-		}
-	}
-	return ""
-}
-
 func makeDirector(remote *url.URL) func(*http.Request) {
 	return func(req *http.Request) {
 		model := getModelFromRequest(req)
 		deployment := GetDeploymentByModel(model)
 
-		HandleToken(req)
+		handleToken(req, req.URL.Path)
 
 		originURL := req.URL.String()
 

From 93f4b9fdcda99a42a1f240b451556bccc7ce5083 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 19:27:30 +0200
Subject: [PATCH 23/33] fix: azure proxy serverless api auth

---
 pkg/azure/proxy.go | 75 ++++++++++------------------------------------
 1 file changed, 15 insertions(+), 60 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index d814052..cd2caf5 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -5,7 +5,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"log"
 	"net/http"
 	"net/http/httputil"
@@ -106,54 +105,11 @@ func init() {
 	log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo))
 }
 
-func proxyRequest(w http.ResponseWriter, r *http.Request) {
-	client := &http.Client{}
-	body, err := ioutil.ReadAll(r.Body)
-	if err != nil {
-		http.Error(w, "Failed to read request body", http.StatusInternalServerError)
-		return
-	}
-
-	req, err := http.NewRequest("POST", "https://Mistral-large2.swedencentral.models.ai.azure.com/v1/chat/completions", strings.NewReader(string(body)))
-	if err != nil {
-		http.Error(w, "Failed to create request", http.StatusInternalServerError)
-		return
-	}
-
-	// Forward headers from the original request
-	for name, values := range r.Header {
-		for _, value := range values {
-			req.Header.Add(name, value)
-		}
-	}
-
-	handleToken(req, r.URL.Path)
-
-	resp, err := client.Do(req)
-	if err != nil {
-		http.Error(w, "Failed to make request", http.StatusInternalServerError)
-		return
-	}
-	defer resp.Body.Close()
-
-	respBody, err := ioutil.ReadAll(resp.Body)
-	if err != nil {
-		http.Error(w, "Failed to read response body", http.StatusInternalServerError)
-		return
-	}
-
-	w.WriteHeader(resp.StatusCode)
-	_, err = w.Write(respBody)
-	if err != nil {
-		http.Error(w, "Failed to write response", http.StatusInternalServerError)
-	}
-}
-
-func handleToken(req *http.Request, path string) {
-	deployment := extractDeploymentFromPath(path)
+func HandleToken(req *http.Request) {
+	deployment := extractDeploymentFromPath(req.URL.Path)
 
 	if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
-		req.Header.Set("Authorization", "Bearer "+info.Key)
+		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
 		req.Header.Del("api-key")
 		log.Printf("Using serverless deployment authentication for %s", deployment)
 		return
@@ -180,16 +136,6 @@ func handleToken(req *http.Request, path string) {
 	}
 }
 
-func extractDeploymentFromPath(path string) string {
-	parts := strings.Split(path, "/")
-	for i, part := range parts {
-		if part == "deployments" && i+1 < len(parts) {
-			return parts[i+1]
-		}
-	}
-	return ""
-}
-
 func handleModelMapper() {
 	overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override"
 
@@ -249,12 +195,22 @@ func sanitizeHeaders(headers http.Header) http.Header {
 	return sanitized
 }
 
+func extractDeploymentFromPath(path string) string {
+	parts := strings.Split(path, "/")
+	for i, part := range parts {
+		if part == "deployments" && i+1 < len(parts) {
+			return parts[i+1]
+		}
+	}
+	return ""
+}
+
 func makeDirector(remote *url.URL) func(*http.Request) {
 	return func(req *http.Request) {
 		model := getModelFromRequest(req)
 		deployment := GetDeploymentByModel(model)
 
-		handleToken(req, req.URL.Path)
+		HandleToken(req)
 
 		originURL := req.URL.String()
 
@@ -262,7 +218,7 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 			req.URL.Scheme = "https"
 			req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region)
 			req.Host = req.URL.Host
-			// For serverless, keep the original path with '/v1' prefix
+			// For serverless, keep the original path
 			log.Printf("Using serverless deployment for %s", deployment)
 		} else {
 			req.Host = remote.Host
@@ -327,7 +283,6 @@ func modifyResponse(res *http.Response) error {
 		res.Body = io.NopCloser(bytes.NewBuffer(body))
 	}
 
-	// Handle streaming responses
 	if res.Header.Get("Content-Type") == "text/event-stream" {
 		res.Header.Set("X-Accel-Buffering", "no")
 	}

From 98cff09591f5369dc90d670fdf6e28dd5d0e36ae Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 19:40:26 +0200
Subject: [PATCH 24/33] chore: Refactor HandleToken function to handle
 serverless deployments and improve token handling logic by removing
 strings.tolower from pre forked code.

---
 pkg/azure/proxy.go | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index cd2caf5..e0d2716 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -108,15 +108,33 @@ func init() {
 func HandleToken(req *http.Request) {
 	deployment := extractDeploymentFromPath(req.URL.Path)
 
-	if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
-		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
-		req.Header.Del("api-key")
-		log.Printf("Using serverless deployment authentication for %s", deployment)
+	// First, try an exact match
+	if info, ok := ServerlessDeploymentInfo[deployment]; ok {
+		setServerlessAuth(req, info, deployment)
 		return
 	}
 
-	var token string
+	// If no exact match, try case-insensitive match
+	for key, info := range ServerlessDeploymentInfo {
+		if strings.EqualFold(key, deployment) {
+			setServerlessAuth(req, info, deployment)
+			return
+		}
+	}
+
+	// If no serverless match, proceed with regular Azure OpenAI authentication
+	handleRegularAuth(req, deployment)
+}
+
+func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) {
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
+	req.Header.Del("api-key")
+	log.Printf("Using serverless deployment authentication for %s", deployment)
+}
 
+func handleRegularAuth(req *http.Request, deployment string) {
+	// Existing code for regular Azure OpenAI authentication
+	var token string
 	if apiKey := req.Header.Get("api-key"); apiKey != "" {
 		token = apiKey
 	} else if authHeader := req.Header.Get("Authorization"); authHeader != "" {

From 3e7adf17e9208f09d0c29b16eae930d73f428431 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 20:09:39 +0200
Subject: [PATCH 25/33] fkry

---
 pkg/azure/proxy.go | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index e0d2716..d8da456 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -50,8 +50,8 @@ var (
 		"text-embedding-3-large":      "text-embedding-3-large-1",
 	}
 	AzureAIStudioDeployments = make(map[string]string)
-	ServerlessDeploymentInfo = make(map[string]ServerlessDeployment)
 	fallbackModelMapper      = regexp.MustCompile(`[.:]`)
+	ServerlessDeploymentInfo = make(map[string]ServerlessDeployment)
 )
 
 type ServerlessDeployment struct {
@@ -76,18 +76,16 @@ func init() {
 			if len(info) == 2 {
 				deploymentInfo := strings.Split(info[1], ":")
 				if len(deploymentInfo) == 2 {
-					AzureAIStudioDeployments[info[0]] = deploymentInfo[0]
-					ServerlessDeploymentInfo[strings.ToLower(info[0])] = ServerlessDeployment{
+					ServerlessDeploymentInfo[info[0]] = ServerlessDeployment{
 						Name:   deploymentInfo[0],
 						Region: deploymentInfo[1],
 						Key:    os.Getenv("AZURE_OPENAI_KEY_" + strings.ToUpper(info[0])),
 					}
 				}
-			} else {
-				log.Printf("error parsing AZURE_AI_STUDIO_DEPLOYMENTS, invalid value %s", pair)
 			}
 		}
 	}
+	log.Printf("Loaded ServerlessDeploymentInfo: %+v", ServerlessDeploymentInfo)
 
 	if v := os.Getenv("AZURE_OPENAI_TOKEN"); v != "" {
 		AzureOpenAIToken = v
@@ -105,35 +103,34 @@ func init() {
 	log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo))
 }
 
-func HandleToken(req *http.Request) {
+func HandleToken(req *http.Request) string {
 	deployment := extractDeploymentFromPath(req.URL.Path)
 
 	// First, try an exact match
 	if info, ok := ServerlessDeploymentInfo[deployment]; ok {
-		setServerlessAuth(req, info, deployment)
-		return
+		return setServerlessAuth(req, info, deployment)
 	}
 
 	// If no exact match, try case-insensitive match
 	for key, info := range ServerlessDeploymentInfo {
 		if strings.EqualFold(key, deployment) {
-			setServerlessAuth(req, info, deployment)
-			return
+			return setServerlessAuth(req, info, deployment)
 		}
 	}
 
 	// If no serverless match, proceed with regular Azure OpenAI authentication
-	handleRegularAuth(req, deployment)
+	return handleRegularAuth(req, deployment)
 }
 
-func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) {
-	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
+func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) string {
+	token := fmt.Sprintf("Bearer %s", info.Key)
+	req.Header.Set("Authorization", token)
 	req.Header.Del("api-key")
 	log.Printf("Using serverless deployment authentication for %s", deployment)
+	return deployment // Return the actual deployment name
 }
 
-func handleRegularAuth(req *http.Request, deployment string) {
-	// Existing code for regular Azure OpenAI authentication
+func handleRegularAuth(req *http.Request, deployment string) string {
 	var token string
 	if apiKey := req.Header.Get("api-key"); apiKey != "" {
 		token = apiKey
@@ -152,6 +149,7 @@ func handleRegularAuth(req *http.Request, deployment string) {
 	} else {
 		log.Printf("Warning: No authentication token found for deployment: %s", deployment)
 	}
+	return deployment
 }
 
 func handleModelMapper() {
@@ -226,13 +224,11 @@ func extractDeploymentFromPath(path string) string {
 func makeDirector(remote *url.URL) func(*http.Request) {
 	return func(req *http.Request) {
 		model := getModelFromRequest(req)
-		deployment := GetDeploymentByModel(model)
-
-		HandleToken(req)
+		deployment := HandleToken(req) // This now returns the actual deployment name
 
 		originURL := req.URL.String()
 
-		if info, ok := ServerlessDeploymentInfo[strings.ToLower(deployment)]; ok {
+		if info, ok := ServerlessDeploymentInfo[deployment]; ok {
 			req.URL.Scheme = "https"
 			req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region)
 			req.Host = req.URL.Host
@@ -290,7 +286,8 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 			req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
 		}
 
-		log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String())
+		log.Printf("Final request URL: %s", req.URL.String())
+		log.Printf("Final request headers: %v", req.Header)
 	}
 }
 

From b2b726eb65ebdb495b9193d1c1e26fa30a0852d7 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 20:14:12 +0200
Subject: [PATCH 26/33] Refactor proxyRequest function to handle response
 writing and error handling

---
 pkg/azure/proxy.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index d8da456..78a2b83 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -227,6 +227,7 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 		deployment := HandleToken(req) // This now returns the actual deployment name
 
 		originURL := req.URL.String()
+		log.Printf("Original request URL: %s for model: %s", originURL, model)
 
 		if info, ok := ServerlessDeploymentInfo[deployment]; ok {
 			req.URL.Scheme = "https"
@@ -286,8 +287,8 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 			req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
 		}
 
-		log.Printf("Final request URL: %s", req.URL.String())
-		log.Printf("Final request headers: %v", req.Header)
+		log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String())
+		log.Printf("Final request headers: %v", sanitizeHeaders(req.Header))
 	}
 }
 

From cc51b447d8cdf079df4ca4398eb0a1ba5ec6d796 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 20:26:18 +0200
Subject: [PATCH 27/33] fkery

---
 pkg/azure/proxy.go | 69 ++++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 33 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 78a2b83..4c0639d 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -103,25 +103,6 @@ func init() {
 	log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo))
 }
 
-func HandleToken(req *http.Request) string {
-	deployment := extractDeploymentFromPath(req.URL.Path)
-
-	// First, try an exact match
-	if info, ok := ServerlessDeploymentInfo[deployment]; ok {
-		return setServerlessAuth(req, info, deployment)
-	}
-
-	// If no exact match, try case-insensitive match
-	for key, info := range ServerlessDeploymentInfo {
-		if strings.EqualFold(key, deployment) {
-			return setServerlessAuth(req, info, deployment)
-		}
-	}
-
-	// If no serverless match, proceed with regular Azure OpenAI authentication
-	return handleRegularAuth(req, deployment)
-}
-
 func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) string {
 	token := fmt.Sprintf("Bearer %s", info.Key)
 	req.Header.Set("Authorization", token)
@@ -221,10 +202,43 @@ func extractDeploymentFromPath(path string) string {
 	return ""
 }
 
+func HandleToken(req *http.Request) string {
+	deployment := extractDeploymentFromPath(req.URL.Path)
+
+	// First, try an exact match for serverless deployment
+	if info, ok := ServerlessDeploymentInfo[deployment]; ok {
+		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
+		req.Header.Del("api-key")
+		log.Printf("Using serverless deployment authentication for %s", deployment)
+		return deployment
+	}
+
+	// If no serverless match, proceed with regular Azure OpenAI authentication
+	var token string
+	if apiKey := req.Header.Get("api-key"); apiKey != "" {
+		token = apiKey
+	} else if authHeader := req.Header.Get("Authorization"); authHeader != "" {
+		token = strings.TrimPrefix(authHeader, "Bearer ")
+	} else if AzureOpenAIToken != "" {
+		token = AzureOpenAIToken
+	} else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" {
+		token = envApiKey
+	}
+
+	if token != "" {
+		req.Header.Set("api-key", token)
+		req.Header.Del("Authorization")
+		log.Printf("Using regular Azure OpenAI authentication for %s", deployment)
+	} else {
+		log.Printf("Warning: No authentication token found for deployment: %s", deployment)
+	}
+	return deployment
+}
+
 func makeDirector(remote *url.URL) func(*http.Request) {
 	return func(req *http.Request) {
 		model := getModelFromRequest(req)
-		deployment := HandleToken(req) // This now returns the actual deployment name
+		deployment := HandleToken(req)
 
 		originURL := req.URL.String()
 		log.Printf("Original request URL: %s for model: %s", originURL, model)
@@ -236,9 +250,9 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 			// For serverless, keep the original path
 			log.Printf("Using serverless deployment for %s", deployment)
 		} else {
-			req.Host = remote.Host
 			req.URL.Scheme = remote.Scheme
 			req.URL.Host = remote.Host
+			req.Host = remote.Host
 
 			// For regular Azure OpenAI, construct the path
 			switch {
@@ -248,18 +262,7 @@ func makeDirector(remote *url.URL) func(*http.Request) {
 				req.URL.Path = path.Join("/openai/deployments", deployment, "completions")
 			case strings.HasPrefix(req.URL.Path, "/v1/embeddings"):
 				req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings")
-			case strings.HasPrefix(req.URL.Path, "/v1/images/generations"):
-				req.URL.Path = path.Join("/openai/deployments", deployment, "images/generations")
-			case strings.HasPrefix(req.URL.Path, "/v1/fine_tunes"):
-				req.URL.Path = path.Join("/openai/deployments", deployment, "fine-tunes")
-			case strings.HasPrefix(req.URL.Path, "/v1/files"):
-				req.URL.Path = path.Join("/openai/deployments", deployment, "files")
-			case strings.HasPrefix(req.URL.Path, "/v1/audio/speech"):
-				req.URL.Path = path.Join("/openai/deployments", deployment, "audio/speech")
-			case strings.HasPrefix(req.URL.Path, "/v1/audio/transcriptions"):
-				req.URL.Path = path.Join("/openai/deployments", deployment, "transcriptions")
-			case strings.HasPrefix(req.URL.Path, "/v1/audio/translations"):
-				req.URL.Path = path.Join("/openai/deployments", deployment, "translations")
+			// ... (keep other cases)
 			default:
 				req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/"))
 			}

From 8122f0a8feb56c1c3ac89115d6d3a2c394c9da81 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 20:42:55 +0200
Subject: [PATCH 28/33] fuck model mapper temp

---
 pkg/azure/proxy.go | 284 +++++++++++----------------------------------
 1 file changed, 66 insertions(+), 218 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 4c0639d..b10ec3d 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -2,7 +2,6 @@ package azure
 
 import (
 	"bytes"
-	"encoding/json"
 	"fmt"
 	"io"
 	"log"
@@ -11,46 +10,14 @@ import (
 	"net/url"
 	"os"
 	"path"
-	"regexp"
 	"strings"
 
 	"github.com/tidwall/gjson"
 )
 
 var (
-	AzureOpenAIToken       = ""
-	AzureOpenAIAPIVersion  = "2024-06-01"
-	AzureOpenAIEndpoint    = ""
-	AzureOpenAIModelMapper = map[string]string{
-		"gpt-3.5-turbo":               "gpt-35-turbo",
-		"gpt-3.5-turbo-0125":          "gpt-35-turbo-0125",
-		"gpt-3.5-turbo-0613":          "gpt-35-turbo-0613",
-		"gpt-3.5-turbo-1106":          "gpt-35-turbo-1106",
-		"gpt-3.5-turbo-16k-0613":      "gpt-35-turbo-16k-0613",
-		"gpt-3.5-turbo-instruct-0914": "gpt-35-turbo-instruct-0914",
-		"gpt-4":                       "gpt-4-0613",
-		"gpt-4-32k":                   "gpt-4-32k",
-		"gpt-4-32k-0613":              "gpt-4-32k-0613",
-		"gpt-4o":                      "gpt-4o",
-		"gpt-4o-mini":                 "gpt-4o-mini",
-		"gpt-4o-2024-05-13":           "gpt-4o-2024-05-13",
-		"gpt-4-turbo":                 "gpt-4-turbo",
-		"gpt-4-vision-preview":        "gpt-4-vision-preview",
-		"gpt-4-turbo-2024-04-09":      "gpt-4-turbo-2024-04-09",
-		"gpt-4-1106-preview":          "gpt-4-1106-preview",
-		"text-embedding-ada-002":      "text-embedding-ada-002",
-		"dall-e-2":                    "dall-e-2",
-		"dall-e-3":                    "dall-e-3",
-		"babbage-002":                 "babbage-002",
-		"davinci-002":                 "davinci-002",
-		"whisper-1":                   "whisper",
-		"tts-1":                       "tts",
-		"tts-1-hd":                    "tts-hd",
-		"text-embedding-3-small":      "text-embedding-3-small-1",
-		"text-embedding-3-large":      "text-embedding-3-large-1",
-	}
-	AzureAIStudioDeployments = make(map[string]string)
-	fallbackModelMapper      = regexp.MustCompile(`[.:]`)
+	AzureOpenAIAPIVersion    = "2024-06-01"
+	AzureOpenAIEndpoint      = ""
 	ServerlessDeploymentInfo = make(map[string]ServerlessDeployment)
 )
 
@@ -68,8 +35,6 @@ func init() {
 		AzureOpenAIEndpoint = v
 	}
 
-	handleModelMapper()
-
 	if v := os.Getenv("AZURE_AI_STUDIO_DEPLOYMENTS"); v != "" {
 		for _, pair := range strings.Split(v, ",") {
 			info := strings.Split(pair, "=")
@@ -86,89 +51,82 @@ func init() {
 		}
 	}
 	log.Printf("Loaded ServerlessDeploymentInfo: %+v", ServerlessDeploymentInfo)
+	log.Printf("Azure OpenAI Endpoint: %s", AzureOpenAIEndpoint)
+	log.Printf("Azure OpenAI API Version: %s", AzureOpenAIAPIVersion)
+}
 
-	if v := os.Getenv("AZURE_OPENAI_TOKEN"); v != "" {
-		AzureOpenAIToken = v
-		log.Printf("loading azure api token from env")
-	}
-
-	log.Printf("loading azure api endpoint: %s", AzureOpenAIEndpoint)
-	log.Printf("loading azure api version: %s", AzureOpenAIAPIVersion)
-	for k, v := range AzureOpenAIModelMapper {
-		log.Printf("final azure model mapper: %s -> %s", k, v)
-	}
-	for k, v := range AzureAIStudioDeployments {
-		log.Printf("loading azure ai studio deployment: %s -> %s", k, v)
+func NewOpenAIReverseProxy() *httputil.ReverseProxy {
+	return &httputil.ReverseProxy{
+		Director:       makeDirector(),
+		ModifyResponse: modifyResponse,
 	}
-	log.Printf("Loaded %d serverless deployment infos", len(ServerlessDeploymentInfo))
 }
 
-func setServerlessAuth(req *http.Request, info ServerlessDeployment, deployment string) string {
-	token := fmt.Sprintf("Bearer %s", info.Key)
-	req.Header.Set("Authorization", token)
-	req.Header.Del("api-key")
-	log.Printf("Using serverless deployment authentication for %s", deployment)
-	return deployment // Return the actual deployment name
-}
+func makeDirector() func(*http.Request) {
+	return func(req *http.Request) {
+		model := getModelFromRequest(req)
+		originURL := req.URL.String()
+		log.Printf("Original request URL: %s for model: %s", originURL, model)
 
-func handleRegularAuth(req *http.Request, deployment string) string {
-	var token string
-	if apiKey := req.Header.Get("api-key"); apiKey != "" {
-		token = apiKey
-	} else if authHeader := req.Header.Get("Authorization"); authHeader != "" {
-		token = strings.TrimPrefix(authHeader, "Bearer ")
-	} else if AzureOpenAIToken != "" {
-		token = AzureOpenAIToken
-	} else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" {
-		token = envApiKey
-	}
+		// Check if it's a serverless deployment
+		if info, ok := ServerlessDeploymentInfo[model]; ok {
+			handleServerlessRequest(req, info, model)
+		} else {
+			handleRegularRequest(req, model)
+		}
 
-	if token != "" {
-		req.Header.Set("api-key", token)
-		req.Header.Del("Authorization")
-		log.Printf("Using regular Azure OpenAI authentication for %s", deployment)
-	} else {
-		log.Printf("Warning: No authentication token found for deployment: %s", deployment)
+		log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String())
+		log.Printf("Final request headers: %v", sanitizeHeaders(req.Header))
 	}
-	return deployment
 }
 
-func handleModelMapper() {
-	overrideMode := strings.ToLower(os.Getenv("AZURE_OPENAI_MODEL_MAPPER_MODE")) == "override"
+func handleServerlessRequest(req *http.Request, info ServerlessDeployment, model string) {
+	req.URL.Scheme = "https"
+	req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region)
+	req.Host = req.URL.Host
 
-	if v := os.Getenv("AZURE_OPENAI_MODEL_MAPPER"); v != "" {
-		for _, pair := range strings.Split(v, ",") {
-			info := strings.Split(pair, "=")
-			if len(info) == 2 {
-				if overrideMode {
-					AzureOpenAIModelMapper[info[0]] = info[1]
-					log.Printf("Overriding model mapping: %s -> %s", info[0], info[1])
-				} else {
-					if _, exists := AzureOpenAIModelMapper[info[0]]; !exists {
-						AzureOpenAIModelMapper[info[0]] = info[1]
-						log.Printf("Adding new model mapping: %s -> %s", info[0], info[1])
-					} else {
-						log.Printf("Skipping existing model mapping: %s", info[0])
-					}
-				}
-			} else {
-				log.Printf("error parsing AZURE_OPENAI_MODEL_MAPPER, invalid value %s", pair)
-			}
-		}
-	}
-}
+	// Keep the original path for serverless deployments
+	// req.URL.Path remains unchanged
 
-func NewOpenAIReverseProxy() *httputil.ReverseProxy {
-	remote, err := url.Parse(AzureOpenAIEndpoint)
-	if err != nil {
-		log.Printf("error parse endpoint: %s\n", AzureOpenAIEndpoint)
-		os.Exit(1)
-	}
+	// Set the correct authorization header for serverless
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
+	req.Header.Del("api-key")
 
-	return &httputil.ReverseProxy{
-		Director:       makeDirector(remote),
-		ModifyResponse: modifyResponse,
-	}
+	log.Printf("Using serverless deployment for %s", model)
+}
+
+func handleRegularRequest(req *http.Request, model string) {
+	remote, _ := url.Parse(AzureOpenAIEndpoint)
+	req.URL.Scheme = remote.Scheme
+	req.URL.Host = remote.Host
+	req.Host = remote.Host
+
+	// Construct the path for regular Azure OpenAI deployments
+	deployment := model // Use the model as the deployment name for regular deployments
+	switch {
+	case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"):
+		req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions")
+	case strings.HasPrefix(req.URL.Path, "/v1/completions"):
+		req.URL.Path = path.Join("/openai/deployments", deployment, "completions")
+	case strings.HasPrefix(req.URL.Path, "/v1/embeddings"):
+		req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings")
+	// Add other cases as needed
+	default:
+		req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/"))
+	}
+
+	// Add api-version query parameter
+	query := req.URL.Query()
+	query.Add("api-version", AzureOpenAIAPIVersion)
+	req.URL.RawQuery = query.Encode()
+
+	// Use the api-key from the original request for regular deployments
+	apiKey := req.Header.Get("api-key")
+	if apiKey == "" {
+		log.Printf("Warning: No api-key found for regular deployment: %s", model)
+	}
+
+	log.Printf("Using regular Azure OpenAI deployment for %s", model)
 }
 
 func getModelFromRequest(req *http.Request) string {
@@ -192,109 +150,6 @@ func sanitizeHeaders(headers http.Header) http.Header {
 	return sanitized
 }
 
-func extractDeploymentFromPath(path string) string {
-	parts := strings.Split(path, "/")
-	for i, part := range parts {
-		if part == "deployments" && i+1 < len(parts) {
-			return parts[i+1]
-		}
-	}
-	return ""
-}
-
-func HandleToken(req *http.Request) string {
-	deployment := extractDeploymentFromPath(req.URL.Path)
-
-	// First, try an exact match for serverless deployment
-	if info, ok := ServerlessDeploymentInfo[deployment]; ok {
-		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
-		req.Header.Del("api-key")
-		log.Printf("Using serverless deployment authentication for %s", deployment)
-		return deployment
-	}
-
-	// If no serverless match, proceed with regular Azure OpenAI authentication
-	var token string
-	if apiKey := req.Header.Get("api-key"); apiKey != "" {
-		token = apiKey
-	} else if authHeader := req.Header.Get("Authorization"); authHeader != "" {
-		token = strings.TrimPrefix(authHeader, "Bearer ")
-	} else if AzureOpenAIToken != "" {
-		token = AzureOpenAIToken
-	} else if envApiKey := os.Getenv("AZURE_OPENAI_API_KEY"); envApiKey != "" {
-		token = envApiKey
-	}
-
-	if token != "" {
-		req.Header.Set("api-key", token)
-		req.Header.Del("Authorization")
-		log.Printf("Using regular Azure OpenAI authentication for %s", deployment)
-	} else {
-		log.Printf("Warning: No authentication token found for deployment: %s", deployment)
-	}
-	return deployment
-}
-
-func makeDirector(remote *url.URL) func(*http.Request) {
-	return func(req *http.Request) {
-		model := getModelFromRequest(req)
-		deployment := HandleToken(req)
-
-		originURL := req.URL.String()
-		log.Printf("Original request URL: %s for model: %s", originURL, model)
-
-		if info, ok := ServerlessDeploymentInfo[deployment]; ok {
-			req.URL.Scheme = "https"
-			req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region)
-			req.Host = req.URL.Host
-			// For serverless, keep the original path
-			log.Printf("Using serverless deployment for %s", deployment)
-		} else {
-			req.URL.Scheme = remote.Scheme
-			req.URL.Host = remote.Host
-			req.Host = remote.Host
-
-			// For regular Azure OpenAI, construct the path
-			switch {
-			case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"):
-				req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions")
-			case strings.HasPrefix(req.URL.Path, "/v1/completions"):
-				req.URL.Path = path.Join("/openai/deployments", deployment, "completions")
-			case strings.HasPrefix(req.URL.Path, "/v1/embeddings"):
-				req.URL.Path = path.Join("/openai/deployments", deployment, "embeddings")
-			// ... (keep other cases)
-			default:
-				req.URL.Path = path.Join("/openai/deployments", deployment, strings.TrimPrefix(req.URL.Path, "/v1/"))
-			}
-
-			// Only add api-version for non-serverless deployments
-			query := req.URL.Query()
-			query.Add("api-version", AzureOpenAIAPIVersion)
-			req.URL.RawQuery = query.Encode()
-		}
-
-		req.URL.RawPath = req.URL.EscapedPath()
-
-		if req.Body != nil {
-			var requestBody map[string]interface{}
-			bodyBytes, _ := io.ReadAll(req.Body)
-			json.Unmarshal(bodyBytes, &requestBody)
-
-			newParams := []string{"completion_config", "presence_penalty", "frequency_penalty", "best_of"}
-			for _, param := range newParams {
-				if val, ok := requestBody[param]; ok {
-					log.Printf("Request includes %s parameter: %v", param, val)
-				}
-			}
-
-			req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
-		}
-
-		log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String())
-		log.Printf("Final request headers: %v", sanitizeHeaders(req.Header))
-	}
-}
-
 func modifyResponse(res *http.Response) error {
 	if res.StatusCode >= 400 {
 		body, _ := io.ReadAll(res.Body)
@@ -308,10 +163,3 @@ func modifyResponse(res *http.Response) error {
 
 	return nil
 }
-
-func GetDeploymentByModel(model string) string {
-	if v, ok := AzureOpenAIModelMapper[model]; ok {
-		return v
-	}
-	return fallbackModelMapper.ReplaceAllString(model, "")
-}

From cb00f3af43b059b1d87e6158cfb14ad4a1df0d56 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 20:45:47 +0200
Subject: [PATCH 29/33] f u c k e r y

---
 pkg/azure/proxy.go | 82 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 69 insertions(+), 13 deletions(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index b10ec3d..96ed7c2 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -19,6 +19,7 @@ var (
 	AzureOpenAIAPIVersion    = "2024-06-01"
 	AzureOpenAIEndpoint      = ""
 	ServerlessDeploymentInfo = make(map[string]ServerlessDeployment)
+	AzureOpenAIModelMapper   = make(map[string]string)
 )
 
 type ServerlessDeployment struct {
@@ -41,7 +42,7 @@ func init() {
 			if len(info) == 2 {
 				deploymentInfo := strings.Split(info[1], ":")
 				if len(deploymentInfo) == 2 {
-					ServerlessDeploymentInfo[info[0]] = ServerlessDeployment{
+					ServerlessDeploymentInfo[strings.ToLower(info[0])] = ServerlessDeployment{
 						Name:   deploymentInfo[0],
 						Region: deploymentInfo[1],
 						Key:    os.Getenv("AZURE_OPENAI_KEY_" + strings.ToUpper(info[0])),
@@ -50,6 +51,37 @@ func init() {
 			}
 		}
 	}
+
+	// Initialize AzureOpenAIModelMapper (you might want to load this from an environment variable or config file)
+	AzureOpenAIModelMapper = map[string]string{
+		"gpt-3.5-turbo":               "gpt-35-turbo",
+		"gpt-3.5-turbo-0125":          "gpt-35-turbo-0125",
+		"gpt-3.5-turbo-0613":          "gpt-35-turbo-0613",
+		"gpt-3.5-turbo-1106":          "gpt-35-turbo-1106",
+		"gpt-3.5-turbo-16k-0613":      "gpt-35-turbo-16k-0613",
+		"gpt-3.5-turbo-instruct-0914": "gpt-35-turbo-instruct-0914",
+		"gpt-4":                       "gpt-4-0613",
+		"gpt-4-32k":                   "gpt-4-32k",
+		"gpt-4-32k-0613":              "gpt-4-32k-0613",
+		"gpt-4o":                      "gpt-4o",
+		"gpt-4o-mini":                 "gpt-4o-mini",
+		"gpt-4o-2024-05-13":           "gpt-4o-2024-05-13",
+		"gpt-4-turbo":                 "gpt-4-turbo",
+		"gpt-4-vision-preview":        "gpt-4-vision-preview",
+		"gpt-4-turbo-2024-04-09":      "gpt-4-turbo-2024-04-09",
+		"gpt-4-1106-preview":          "gpt-4-1106-preview",
+		"text-embedding-ada-002":      "text-embedding-ada-002",
+		"dall-e-2":                    "dall-e-2",
+		"dall-e-3":                    "dall-e-3",
+		"babbage-002":                 "babbage-002",
+		"davinci-002":                 "davinci-002",
+		"whisper-1":                   "whisper",
+		"tts-1":                       "tts",
+		"tts-1-hd":                    "tts-hd",
+		"text-embedding-3-small":      "text-embedding-3-small-1",
+		"text-embedding-3-large":      "text-embedding-3-large-1",
+	}
+
 	log.Printf("Loaded ServerlessDeploymentInfo: %+v", ServerlessDeploymentInfo)
 	log.Printf("Azure OpenAI Endpoint: %s", AzureOpenAIEndpoint)
 	log.Printf("Azure OpenAI API Version: %s", AzureOpenAIAPIVersion)
@@ -68,10 +100,16 @@ func makeDirector() func(*http.Request) {
 		originURL := req.URL.String()
 		log.Printf("Original request URL: %s for model: %s", originURL, model)
 
+		// Convert model to lowercase for case-insensitive matching
+		modelLower := strings.ToLower(model)
+
 		// Check if it's a serverless deployment
-		if info, ok := ServerlessDeploymentInfo[model]; ok {
+		if info, ok := ServerlessDeploymentInfo[modelLower]; ok {
 			handleServerlessRequest(req, info, model)
+		} else if azureModel, ok := AzureOpenAIModelMapper[modelLower]; ok {
+			handleRegularRequest(req, azureModel)
 		} else {
+			log.Printf("Warning: Unknown model %s, treating as regular Azure OpenAI deployment", model)
 			handleRegularRequest(req, model)
 		}
 
@@ -85,8 +123,13 @@ func handleServerlessRequest(req *http.Request, info ServerlessDeployment, model
 	req.URL.Host = fmt.Sprintf("%s.%s.models.ai.azure.com", info.Name, info.Region)
 	req.Host = req.URL.Host
 
-	// Keep the original path for serverless deployments
-	// req.URL.Path remains unchanged
+	// Preserve query parameters from the original request
+	originalQuery := req.URL.Query()
+	for key, values := range originalQuery {
+		for _, value := range values {
+			req.URL.Query().Add(key, value)
+		}
+	}
 
 	// Set the correct authorization header for serverless
 	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
@@ -95,14 +138,13 @@ func handleServerlessRequest(req *http.Request, info ServerlessDeployment, model
 	log.Printf("Using serverless deployment for %s", model)
 }
 
-func handleRegularRequest(req *http.Request, model string) {
+func handleRegularRequest(req *http.Request, deployment string) {
 	remote, _ := url.Parse(AzureOpenAIEndpoint)
 	req.URL.Scheme = remote.Scheme
 	req.URL.Host = remote.Host
 	req.Host = remote.Host
 
 	// Construct the path for regular Azure OpenAI deployments
-	deployment := model // Use the model as the deployment name for regular deployments
 	switch {
 	case strings.HasPrefix(req.URL.Path, "/v1/chat/completions"):
 		req.URL.Path = path.Join("/openai/deployments", deployment, "chat/completions")
@@ -123,19 +165,33 @@ func handleRegularRequest(req *http.Request, model string) {
 	// Use the api-key from the original request for regular deployments
 	apiKey := req.Header.Get("api-key")
 	if apiKey == "" {
-		log.Printf("Warning: No api-key found for regular deployment: %s", model)
+		log.Printf("Warning: No api-key found for regular deployment: %s", deployment)
 	}
 
-	log.Printf("Using regular Azure OpenAI deployment for %s", model)
+	log.Printf("Using regular Azure OpenAI deployment for %s", deployment)
 }
 
 func getModelFromRequest(req *http.Request) string {
-	if req.Body == nil {
-		return ""
+	// First, try to get the model from the URL path
+	parts := strings.Split(req.URL.Path, "/")
+	for i, part := range parts {
+		if part == "deployments" && i+1 < len(parts) {
+			return parts[i+1]
+		}
 	}
-	body, _ := io.ReadAll(req.Body)
-	req.Body = io.NopCloser(bytes.NewBuffer(body))
-	return gjson.GetBytes(body, "model").String()
+
+	// If not found in the path, try to get it from the request body
+	if req.Body != nil {
+		body, _ := io.ReadAll(req.Body)
+		req.Body = io.NopCloser(bytes.NewBuffer(body))
+		model := gjson.GetBytes(body, "model").String()
+		if model != "" {
+			return model
+		}
+	}
+
+	// If still not found, return an empty string
+	return ""
 }
 
 func sanitizeHeaders(headers http.Header) http.Header {

From 00b971d4c3cb1dc8e6aa6f92180c40972ed55c25 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 20:58:18 +0200
Subject: [PATCH 30/33] FU C K R Y

---
 pkg/azure/proxy.go | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 96ed7c2..378caeb 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -94,12 +94,44 @@ func NewOpenAIReverseProxy() *httputil.ReverseProxy {
 	}
 }
 
+func HandleToken(req *http.Request) {
+	model := getModelFromRequest(req)
+	modelLower := strings.ToLower(model)
+
+	// Check if it's a serverless deployment
+	if info, ok := ServerlessDeploymentInfo[modelLower]; ok {
+		// Set the correct authorization header for serverless
+		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.Key))
+		req.Header.Del("api-key")
+		log.Printf("Using serverless deployment authentication for %s", model)
+	} else {
+		// For regular Azure OpenAI deployments, use the api-key
+		apiKey := req.Header.Get("api-key")
+		if apiKey == "" {
+			apiKey = req.Header.Get("Authorization")
+			if strings.HasPrefix(apiKey, "Bearer ") {
+				apiKey = strings.TrimPrefix(apiKey, "Bearer ")
+			}
+		}
+		if apiKey == "" {
+			log.Printf("Warning: No api-key or Authorization header found for deployment: %s", model)
+		} else {
+			req.Header.Set("api-key", apiKey)
+			req.Header.Del("Authorization")
+			log.Printf("Using regular Azure OpenAI authentication for %s", model)
+		}
+	}
+}
+
 func makeDirector() func(*http.Request) {
 	return func(req *http.Request) {
 		model := getModelFromRequest(req)
 		originURL := req.URL.String()
 		log.Printf("Original request URL: %s for model: %s", originURL, model)
 
+		// Handle the token
+		HandleToken(req)
+
 		// Convert model to lowercase for case-insensitive matching
 		modelLower := strings.ToLower(model)
 

From 679a07efb1a4e6cebb2535c1b4e97cac19d6d072 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 22:50:16 +0200
Subject: [PATCH 31/33] feat: Add Azure OAI proxy service configuration, docker
 compose example and updated readme

---
 .github/workflows/compose.yaml |  21 ++++
 README.md                      | 208 ++++++++++++++++++---------------
 2 files changed, 133 insertions(+), 96 deletions(-)
 create mode 100644 .github/workflows/compose.yaml

diff --git a/.github/workflows/compose.yaml b/.github/workflows/compose.yaml
new file mode 100644
index 0000000..67baa9b
--- /dev/null
+++ b/.github/workflows/compose.yaml
@@ -0,0 +1,21 @@
+services:
+  azure-oai-proxy:
+    # env_file: .env
+    image: 'gyarbij/azure-oai-proxy:latest'
+    # container_name: azure-oai-proxy
+    # Alternatively, use GitHub Container Registry:
+    # image: 'ghcr.io/gyarbij/azure-oai-proxy:latest'
+    restart: always
+    environment:
+      - AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
+    # - AZURE_OPENAI_PROXY_ADDRESS=0.0.0.0:11437
+    # - AZURE_OPENAI_PROXY_MODE=azure
+    # - AZURE_OPENAI_APIVERSION=2024-06-01
+    # - AZURE_OPENAI_MODEL_MAPPER=gpt-3.5-turbo=gpt-35-turbo,gpt-4=gpt-4-turbo
+    # - AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large-2407=Mistral-large2:swedencentral,llama-3.1-405B=Meta-Llama-3-1-405B-Instruct:northcentralus,llama-3.1-70B=Llama-31-70B:swedencentral
+    # - AZURE_OPENAI_KEY_MISTRAL-LARGE-2407=your-api-key-1
+    # - AZURE_OPENAI_KEY_LLAMA-3.1-8B=your-api-key-2
+    # - AZURE_OPENAI_KEY_LLAMA-3.1-70B=your-api-key-3
+    ports:
+      - '11437:11437'
+    # Uncomment the following line to use an .env file:
\ No newline at end of file
diff --git a/README.md b/README.md
index 257dbe4..19b1bac 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,24 @@
 # Azure OpenAI Proxy
 
 [![Go Report Card](https://goreportcard.com/badge/github.com/Gyarbij/azure-oai-proxy)](https://goreportcard.com/report/github.com/Gyarbij/azure-oai-proxy)
-[![License](https://badgen.net/badge/license/MIT/cyan)](https://github.com/gyarbij/azure-oai-proxy/blob/main/LICENSE)
-[![Release](https://badgen.net/github/release/gyarbij/azure-oai-proxy/latest)](https://github.com/gyarbij/azure-oai-proxy)
-[![Azure](https://badgen.net/badge/icon/Azure?icon=azure&label)](https://github.com/gyarbij/azure-oai-proxy)
-[![Azure](https://badgen.net/badge/icon/OpenAI?icon=azure&label)](https://github.com/gyarbij/azure-oai-proxy)
-[![Azure](https://badgen.net/badge/icon/docker?icon=docker&label)](https://github.com/gyarbij/azure-oai-proxy)
+[![Main v Dev Commits](https://shields.git.vg/github/commits-difference/Gyarbij/azure-oai-proxy?base=main&head=dev)](https://github.com/gyarbij/azure-oai-proxy)
+[![Taal](https://shields.git.vg/github/languages/top/Gyarbij/azure-oai-proxy)](https://github.com/gyarbij/azure-oai-proxy)
+[![GHCR Build](https://shields.git.vg/github/actions/workflow/status/gyarbij/azure-oai-proxy/ghcr-docker-publish.yml)](https://github.com/gyarbij/azure-oai-proxy)
+[![License](https://shields.git.vg/github/license/Gyarbij/azure-oai-proxy?style=for-the-badge&color=blue)](https://github.com/gyarbij/azure-oai-proxy/blob/main/LICENSE)
 
 ## Introduction
 
-Azure OAI Proxy is a lightweight, high-performance proxy server that enables seamless integration between Azure OpenAI Services and applications designed for only OpenAI API compatible endpoints. This project bridges the gap for tools and services that are built to work with OpenAI's API structure but need to utilize Azure's OpenAI.
+Azure OAI Proxy is a lightweight, high-performance proxy server that enables seamless integration between Azure OpenAI Services and applications designed for OpenAI API only compatible endpoints. This project bridges the gap for tools and services that are built to work with OpenAI's API structure but need to utilize Azure's OpenAI.
 
 ## Key Features
 
 - ✅ **API Compatibility**: Translates requests from OpenAI API format to Azure OpenAI Services format on-the-fly.
 - 🗺️ **Model Mapping**: Automatically maps OpenAI model names to Azure scheme.
-- 🔄 **Dynamic Model List**: Fetches available models directly from your Azure OpenAI deployment to have feature parity with normal OpenAI, in projects such as Open WebUI.
+- 🔄 **Dynamic Model List**: Fetches available models directly from your Azure OpenAI deployment.
 - 🌐 **Support for Multiple Endpoints**: Handles various API endpoints including image, speech, completions, chat completions, embeddings, and more.
 - 🚦 **Error Handling**: Provides meaningful error messages and logging for easier debugging.
-- ⚙️ **Configurable**: Easy to set up with environment variables for Azure OpenAI endpoint and API key.
+- ⚙️ **Configurable**: Easy to set up with environment variables for Azure AI/Azure OAI endpoint and API keys.
+- 🔐 **Serverless Deployment Support**: Supports Azure AI serverless deployments with custom authentication.
 
 ## Use Cases
 
@@ -45,119 +45,127 @@ Also, I strongly recommend using TSL/SSL for secure communication between the pr
 
 ## Supported APIs
 
-The latest version of the Azure OpenAI service now supports the following APIs:
+The latest version of the Azure OpenAI service supports the following APIs:
+
+| Path                           | Status |
+|--------------------------------|--------|
+| /v1/chat/completions           | ✅     |
+| /v1/completions                | ✅     |
+| /v1/embeddings                 | ✅     |
+| /v1/images/generations         | ✅     |
+| /v1/fine_tunes                 | ✅     |
+| /v1/files                      | ✅     |
+| /v1/models                     | ✅     |
+| /deployments                   | ✅     |
+| /v1/audio/speech               | ✅     |
+| /v1/audio/transcriptions       | ✅     |
+| /v1/audio/translations         | ✅     |
+| /v1/models/:model_id/capabilities | ✅     |
 
-| Path                  | Status |
-| --------------------- | ------ |
-| /v1/chat/completions  |  ✅   |
-| /v1/completions       | ✅    |
-| /v1/embeddings        | ✅    |
-| /v1/images/generations | ✅   |
-| /v1/fine_tunes        | ✅    |
-| /v1/files             | ✅    |
-| /v1/models            | ✅    |
-| /deployments          | ✅    |
-| /v1/audio             | ✅    |
-
-> Other APIs not supported by Azure will be returned in a mock format (such as OPTIONS requests initiated by browsers). If you find your project need additional OpenAI-supported APIs, feel free to submit a PR.
-
-## Getting Started
-
-It's easy to get started with Azure OAI Proxy. You can either deploy it as a reverse proxy or use it as a forward proxy as detailed below. However if you're ready to jump right in and start using the proxy, you can use the following Docker command:
-
-```docker
-docker pull gyarbij/azure-oai-proxy:latest
+## Configuration
 
-docker run -d -p 11437:11437 --name=azure-oai-proxy \
-  --env AZURE_OPENAI_ENDPOINT=https://{YOURENDPOINT}.openai.azure.com \
-  gyarbij/azure-oai-proxy:latest
+### Environment Variables
+
+| Parameter                     | Description                                                                | Default Value        | Required |
+|-------------------------------|----------------------------------------------------------------------------|----------------------|----------|
+| AZURE_OPENAI_ENDPOINT         | Azure OpenAI Endpoint                                                      |                      | Yes      |
+| AZURE_OPENAI_PROXY_ADDRESS    | Service listening address                                                  | 0.0.0.0:11437        | No       |
+| AZURE_OPENAI_PROXY_MODE       | Proxy mode, can be either "azure" or "openai"                              | azure                | No       |
+| AZURE_OPENAI_APIVERSION       | Azure OpenAI API version                                                   | 2024-06-01           | No       |
+| AZURE_OPENAI_MODEL_MAPPER     | Comma-separated list of model=deployment pairs                             |                      | No       |
+| AZURE_AI_STUDIO_DEPLOYMENTS   | Comma-separated list of serverless deployments                             |                      | No       |
+| AZURE_OPENAI_KEY_*            | API keys for serverless deployments (replace * with uppercase model name)  |                      | No       |
+
+## Usage
+
+### Docker Compose
+
+Here's an example `docker-compose.yml` file with all possible environment variable options:
+
+```yaml
+services:
+  azure-oai-proxy:
+    image: 'gyarbij/azure-oai-proxy:latest'
+    # container_name: azure-oai-proxy
+    # Alternatively, use GitHub Container Registry:
+    # image: 'ghcr.io/gyarbij/azure-oai-proxy:latest'
+    restart: always
+    environment:
+      - AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
+    # - AZURE_OPENAI_PROXY_ADDRESS=0.0.0.0:11437
+    # - AZURE_OPENAI_PROXY_MODE=azure
+    # - AZURE_OPENAI_APIVERSION=2024-06-01
+    # - AZURE_OPENAI_MODEL_MAPPER=gpt-3.5-turbo=gpt-35-turbo,gpt-4=gpt-4-turbo
+    # - AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large-2407=Mistral-large2:swedencentral,llama-3.1-405B=Meta-Llama-3-1-405B-Instruct:northcentralus,llama-3.1-70B=Llama-31-70B:swedencentral
+    # - AZURE_OPENAI_KEY_MISTRAL-LARGE-2407=your-api-key-1
+    # - AZURE_OPENAI_KEY_LLAMA-3.1-8B=your-api-key-2
+    # - AZURE_OPENAI_KEY_LLAMA-3.1-70B=your-api-key-3
+    ports:
+      - '11437:11437'
+    # Uncomment the following line to use an .env file:
+    # env_file: .env
 ```
 
-## Configuration
+To use this configuration:
 
-### 1. Used as reverse proxy (i.e. an OpenAI API gateway)
+1. Save the above content in a file named `compose.yaml`.
+2. Replace the placeholder values (e.g., `your-endpoint`, `your-api-key-1`, etc.) with your actual Azure OpenAI configuration.
+3. Run the following command in the same directory as your `compose.yaml` file:
 
-Environment Variables
+```sh
+docker compose up -d
+```
 
-Here's the updated markdown table including a column for required:
+### Using an .env File
 
-| Parameters                                   | Description                                                                                                                                                                                                                                                                                                    | Default Value                                                           | Required |
-| :------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------- | :------- |
-| AZURE_OPENAI_PROXY_ADDRESS                   | Service listening address                                                                                                                                                                                                                                                                                      | 0.0.0.0:11437                                                            | No       |
-| AZURE_OPENAI_PROXY_MODE                      | Proxy mode, can be either "azure" or "openai".                                                                                                                                                                                                                                                                 | azure                                                                   | No       |
-| AZURE_OPENAI_ENDPOINT                        | Azure OpenAI Endpoint, usually looks like https://{YOURDEPLOYMENT}.openai.azure.com.                                                                                                                                                                                                                         |                                                                         | Yes      |
-| AZURE_OPENAI_APIVERSION                      | Azure OpenAI API version. Default is 2024-05-01-preview.                                                                                                                                                                                                                                                       | 2024-05-01-preview                                                      | No       |
-| AZURE_OPENAI_MODEL_MAPPER (Use for custom deployment names) | A comma-separated list of model=deployment pairs. Maps model names to deployment names. For example, `gpt-3.5-turbo=gpt-35-turbo`, `gpt-3.5-turbo-0301=gpt-35-turbo-0301`. If there is no match, the proxy will pass model as deployment name directly (most Azure model names are the same as OpenAI). | "" | No       |
-| AZURE_OPENAI_TOKEN                           | Azure OpenAI API Token. If this environment variable is set, the token in the request header will be ignored.                                                                                                                                                                                                  | ""                                                                      | No       |
+To use an .env file instead of environment variables in the Docker Compose file:
 
-Use in command line
+1. Create a file named `.env` in the same directory as your `docker-compose.yml`.
+2. Add your environment variables to the `.env` file, one per line:
 
-```shell
-curl https://{your-custom-domain}/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer {your azure api key}" \
-  -d '{
-    "model": "gpt-3.5-turbo",
-    "messages": [{"role": "user", "content": "Hello!"}]
-  }'
+```
+AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
+AZURE_OPENAI_APIVERSION=2024-06-01
+AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large-2407=Mistral-large2:swedencentral,llama-3.1-405B=Meta-Llama-3-1-405B-Instruct:northcentralus
+AZURE_OPENAI_KEY_MISTRAL-LARGE-2407=your-api-key-1
+AZURE_OPENAI_KEY_LLAMA-3.1-405B=your-api-key-2
 ```
 
-### 2. Used as forward proxy (i.e. an HTTP proxy)
-
-When accessing Azure OpenAI API through HTTP, it can be used directly as a proxy, but this tool does not have built-in HTTPS support, so you need an HTTPS proxy such as Nginx to support accessing HTTPS version of OpenAI API.
-
-Assuming that the proxy domain you configured is `https://{your-domain}.com`, you can execute the following commands in the terminal to use the https proxy:
-
-```shell
-export https_proxy=https://{your-domain}.com
+3. Uncomment the `env_file: .env` line in your `docker-compose.yml`.
+4. Run `docker-compose up -d` to start the container with the environment variables from the .env file.
 
-curl https://api.openai.com/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer {your azure api key}" \
-  -d '{
-    "model": "gpt-3.5-turbo",
-    "messages": [{"role": "user", "content": "Hello!"}]
-  }'
-```
+### Running from GitHub Container Registry
 
-Or configure it as an HTTP proxy in other open source Web ChatGPT projects:
+To run the Azure OAI Proxy using the image from GitHub Container Registry:
 
-```
-export HTTPS_PROXY=https://{your-domain}.com
+```sh
+docker run -d -p 11437:11437 \
+  -e AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/ \
+  -e AZURE_AI_STUDIO_DEPLOYMENTS=mistral-large-2407=Mistral-large2:swedencentral \
+  -e AZURE_OPENAI_KEY_MISTRAL-LARGE-2407=your-api-key \
+  ghcr.io/gyarbij/azure-oai-proxy:latest
 ```
 
-## Deploy
+Replace the placeholder values with your actual Azure OpenAI configuration.
 
-Docker Normal Deployment
+## Usage Examples
 
-```shell
-docker pull gyarbij/azure-oai-proxy:latest
-docker run -p 11437:11437 --name=azure-oai-proxy \
-  --env AZURE_OPENAI_ENDPOINT=https://{YOURENDPOINT}.openai.azure.com/ \
-  gyarbij/azure-oai-proxy:latest
-```
-Docker with custom deployment names
-
-```shell
-docker pull gyarbij/azure-oai-proxy:latest
-docker run -p 11437:11437 --name=azure-oai-proxy \
-  --env AZURE_OPENAI_ENDPOINT=https://{YOURENDPOINT}.openai.azure.com/ \
-  --env AZURE_OPENAI_MODEL_MAPPER=gpt-3.5-turbo=dev-g35-turbo,gpt-4=gpt-4ooo \
-  gyarbij/azure-oai-proxy:latest
-```
+### Calling the API
 
-Calling
+Once the proxy is running, you can call it using the OpenAI API format:
 
-```shell
-curl https://localhost:11437/v1/chat/completions \
+```sh
+curl http://localhost:11437/v1/chat/completions \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer {your azure api key}" \
+  -H "Authorization: Bearer your-azure-api-key" \
   -d '{
     "model": "gpt-3.5-turbo",
     "messages": [{"role": "user", "content": "Hello!"}]
   }'
 ```
 
+For serverless deployments, use the model name as defined in your `AZURE_AI_STUDIO_DEPLOYMENTS` configuration.
+
 ## Model Mapping Mechanism (Used for Custom deployment names)
 
 These are the default mappings for the most common models, if your Azure OpenAI deployment uses different names, you can set the `AZURE_OPENAI_MODEL_MAPPER` environment variable to define custom mappings.:
@@ -173,7 +181,7 @@ These are the default mappings for the most common models, if your Azure OpenAI
 | `"gpt-4"`                       | `"gpt-4-0613"`                |
 | `"gpt-4-32k"`                   | `"gpt-4-32k"`                 |
 | `"gpt-4-32k-0613"`              | `"gpt-4-32k-0613"`            |
-| `"gpt-4o-mini"`                 | `"gpt-4o-mini"`                    |
+| `"gpt-4o-mini"`                 | `"gpt-4o-mini-2024-07-18"`    |
 | `"gpt-4o"`                      | `"gpt-4o"`                    |
 | `"gpt-4o-2024-05-13"`           | `"gpt-4o-2024-05-13"`         |
 | `"gpt-4-turbo"`                 | `"gpt-4-turbo"`               |
@@ -198,8 +206,16 @@ For custom fine-tuned models, the model name can be passed directly. For models
 | gpt-3.5-turbo      | gpt-35-turbo-upgrade         |
 | gpt-3.5-turbo-0301 | gpt-35-turbo-0301-fine-tuned |
 
+## Important Notes
+
+- Always use HTTPS in production environments for secure communication.
+- Regularly update the proxy to ensure compatibility with the latest Azure OpenAI API changes.
+- Monitor your Azure OpenAI usage and costs, especially when using this proxy in high-traffic scenarios.
+
 ## Recently Updated
 
++ 2024-07-25 Implemented support for Azure AI Studio deployments with support for Meta LLama 3.1, Mistral-2407 (mistral large 2), and other open models including from Cohere AI.
++ 2024-07-18 Added support for `gpt4o-mini`.
 + 2024-06-23 Implemented dynamic model fetching for `/v1/models endpoint`, replacing hardcoded model list.
 + 2024-06-23 Unified token handling mechanism across the application, improving consistency and security.
 + 2024-06-23 Added support for audio-related endpoints: `/v1/audio/speech`, `/v1/audio/transcriptions`, and `/v1/audio/translations`.
@@ -216,11 +232,11 @@ For custom fine-tuned models, the model name can be passed directly. For models
 
 ## Contributing
 
-We welcome contributions! Rest TBD.
+Contributions are welcome! Please feel free to submit a Pull Request.
 
 ## License
 
-MIT License
+This project is licensed under the MIT License.
 
 ## Disclaimer
 

From f183631c34aef34b882c8e06801a7a0b2a40a060 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 22:53:11 +0200
Subject: [PATCH 32/33] Updated README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 19b1bac..b102a24 100644
--- a/README.md
+++ b/README.md
@@ -215,7 +215,7 @@ For custom fine-tuned models, the model name can be passed directly. For models
 ## Recently Updated
 
 + 2024-07-25 Implemented support for Azure AI Studio deployments with support for Meta LLama 3.1, Mistral-2407 (mistral large 2), and other open models including from Cohere AI.
-+ 2024-07-18 Added support for `gpt4o-mini`.
++ 2024-07-18 Added support for `gpt-4o-mini`.
 + 2024-06-23 Implemented dynamic model fetching for `/v1/models endpoint`, replacing hardcoded model list.
 + 2024-06-23 Unified token handling mechanism across the application, improving consistency and security.
 + 2024-06-23 Added support for audio-related endpoints: `/v1/audio/speech`, `/v1/audio/transcriptions`, and `/v1/audio/translations`.

From 0e798a19e1b718afbf6fbe098134b7f36a620da4 Mon Sep 17 00:00:00 2001
From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com>
Date: Thu, 25 Jul 2024 22:58:50 +0200
Subject: [PATCH 33/33] chore: Remove commented out log statement in
 makeDirector function for prod

---
 pkg/azure/proxy.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/azure/proxy.go b/pkg/azure/proxy.go
index 378caeb..4d93766 100644
--- a/pkg/azure/proxy.go
+++ b/pkg/azure/proxy.go
@@ -146,7 +146,7 @@ func makeDirector() func(*http.Request) {
 		}
 
 		log.Printf("Proxying request [%s] %s -> %s", model, originURL, req.URL.String())
-		log.Printf("Final request headers: %v", sanitizeHeaders(req.Header))
+		// log.Printf("Final request headers: %v", sanitizeHeaders(req.Header))
 	}
 }