ArcadeAI · EricGustin · Dec 20, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 19, 2024
diff --git a/.github/scripts/get_toolkits.sh b/.github/scripts/get_toolkits.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Get all directories and format as JSON array
+echo -n '['
+ls -d toolkits/*/ | cut -d'/' -f2 | sort -u | awk '{
+    if (NR==1) {
+        printf "\"%s\"", $0
+    } else {
+        printf ", \"%s\"", $0
+    }
+}'
+echo ']'
diff --git a/.github/workflows/publish-toolkit.yml b/.github/workflows/publish-toolkit.yml
@@ -39,15 +39,12 @@ jobs:
         python-version: '3.12'
         cache: 'pip'
 
-    - name: Install Python Dependencies
-      run: pip install ./arcade
-
     - name: Test Toolkit
       id: Test_Toolkit
       working-directory: toolkits/${{ steps.set-toolkit.outputs.toolkit }}
       run: |
-        make check
         make install
+        make check
         make test
 
     - name: Publish Toolkit

diff --git a/.github/workflows/test-toolkits.yml b/.github/workflows/test-toolkits.yml
@@ -0,0 +1,54 @@
+name: Test Toolkits
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+
+jobs:
+  setup:
+    runs-on: ubuntu-latest
+    outputs:
+      tool_matrix: ${{ steps.dataStep.outputs.tools }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Get Toolkits
+        id: dataStep
+        run: |
+          TARGETS=$(./.github/scripts/get_toolkits.sh)
+          echo "tools=$(jq -cn --argjson environments "$TARGETS" '{target: $environments}')" >> $GITHUB_OUTPUT
+
+  test-toolkits:
+    needs: setup
+    runs-on: ubuntu-latest
+    strategy:
+      matrix: ${{ fromJson(needs.setup.outputs.tool_matrix) }}
+    steps:
+    - run: echo ${{ matrix.target }}
+
+    - name: Checkout code
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+
+    - name: Install Poetry
+      uses: snok/install-poetry@v1
+
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
+        cache: 'pip'
+
+    - name: Test Toolkit
+      id: Test_Toolkit
+      working-directory: toolkits/${{ matrix.target }}
+      run: |
+        make install
+        make check
+        make test
diff --git a/arcade/arcade/core/schema.py b/arcade/arcade/core/schema.py
@@ -234,6 +234,10 @@ class ToolContext(BaseModel):
     user_id: str | None = None
     """The user ID for the tool invocation (if any)."""
 
+    def get_auth_token_or_empty(self) -> str:
+        """Retrieve the authorization token, or return an empty string if not available."""
+        return self.authorization.token if self.authorization and self.authorization.token else ""
+
 
 class ToolCallRequest(BaseModel):
     """The request to call (invoke) a tool."""

diff --git a/arcade/arcade/templates/{{ toolkit_name }}/Makefile b/arcade/arcade/templates/{{ toolkit_name }}/Makefile
@@ -13,11 +13,6 @@ install: ## Install the poetry environment and install the pre-commit hooks
 	else \
 		echo "📦 Poetry is already installed"; \
 	fi
-	@echo "📦 Checking for poetry.lock file"
-	@if [ ! -f poetry.lock ]; then \
-		echo "📦 Creating poetry.lock file"; \
-		poetry lock; \
-	fi
 	@echo "🚀 Installing package in development mode with all extras"
 	poetry install --all-extras
 
@@ -28,12 +23,8 @@ build: clean-build ## Build wheel file using poetry
 
 .PHONY: clean-build
 clean-build: ## clean build artifacts
-	rm -rf dist
-
-.PHONY: clean-dist
-clean-dist: ## Clean all built distributions
 	@echo "🗑️ Cleaning dist directory"
-	@rm -rf dist
+	rm -rf dist
 
 .PHONY: test
 test: ## Test the code with pytest
@@ -54,9 +45,9 @@ bump-version: ## Bump the version in the pyproject.toml file
 
 .PHONY: check
 check: ## Run code quality tools.
-	@echo "🚀 Checking Poetry lock file consistency with 'pyproject.toml': Running poetry check --lock"
-	@poetry check --lock
+	@echo "🚀 Checking Poetry lock file consistency with 'pyproject.toml': Running poetry check"
+	@poetry check
 	@echo "🚀 Linting code: Running pre-commit"
 	@poetry run pre-commit run -a
 	@echo "🚀 Static type checking: Running mypy"
-	@poetry run mypy $(git ls-files '*.py')
+	@poetry run mypy --config-file=pyproject.toml
diff --git a/arcade/arcade/templates/{{ toolkit_name }}/evals/eval_{{ toolkit_name }}.py b/arcade/arcade/templates/{{ toolkit_name }}/evals/eval_{{ toolkit_name }}.py
@@ -2,6 +2,7 @@
 from arcade.sdk.eval import (
     EvalRubric,
     EvalSuite,
+    ExpectedToolCall,
     SimilarityCritic,
     tool_eval,
 )
@@ -35,7 +36,12 @@ def {{ toolkit_name }}_eval_suite() -> EvalSuite:
     suite.add_case(
         name="Saying hello",
         user_message="He's actually right here, say hi to him!",
-        expected_tool_calls=[(say_hello, {"name": "John Doe"})],
+        expected_tool_calls=[
+            ExpectedToolCall(
+                func=say_hello,
+                args={"name": "John Doe"}
+            )
+        ],
         rubric=rubric,
         critics=[
             SimilarityCritic(critic_field="name", weight=0.5),

diff --git a/arcade/tests/core/test_schema.py b/arcade/tests/core/test_schema.py
@@ -0,0 +1,24 @@
+from arcade.core.schema import ToolAuthorizationContext, ToolContext
+
+
+def test_get_auth_token_or_empty_with_token():
+    expected_token = "test_token"  # noqa: S105
+    auth_context = ToolAuthorizationContext(token=expected_token)
+    tool_context = ToolContext(authorization=auth_context)
+
+    actual_token = tool_context.get_auth_token_or_empty()
+
+    assert actual_token == expected_token
+
+
+def test_get_auth_token_or_empty_without_token():
+    auth_context = ToolAuthorizationContext(token=None)
+    tool_context = ToolContext(authorization=auth_context)
+
+    assert tool_context.get_auth_token_or_empty() == ""
+
+
+def test_get_auth_token_or_empty_no_authorization():
+    tool_context = ToolContext(authorization=None)
+
+    assert tool_context.get_auth_token_or_empty() == ""
diff --git a/toolkits/code_sandbox/.pre-commit-config.yaml b/toolkits/code_sandbox/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+files: ^./
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: "v4.4.0"
+    hooks:
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.7
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
diff --git a/toolkits/code_sandbox/.ruff.toml b/toolkits/code_sandbox/.ruff.toml
@@ -0,0 +1,47 @@
+target-version = "py39"
+line-length = 100
+fix = true
+
+[lint]
+select = [
+    # flake8-2020
+    "YTT",
+    # flake8-bandit
+    "S",
+    # flake8-bugbear
+    "B",
+    # flake8-builtins
+    "A",
+    # flake8-comprehensions
+    "C4",
+    # flake8-debugger
+    "T10",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+    # mccabe
+    "C90",
+    # pycodestyle
+    "E", "W",
+    # pyflakes
+    "F",
+    # pygrep-hooks
+    "PGH",
+    # pyupgrade
+    "UP",
+    # ruff
+    "RUF",
+    # tryceratops
+    "TRY",
+]
+
+[lint.per-file-ignores]
+"*" = ["TRY003", "B904"]
+"**/tests/*" = ["S101", "E501"]
+"**/evals/*" = ["S101", "E501"]
+
+
+[format]
+preview = true
+skip-magic-trailing-comma = false
diff --git a/toolkits/code_sandbox/LICENSE b/toolkits/code_sandbox/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024, Arcade AI
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/toolkits/code_sandbox/Makefile b/toolkits/code_sandbox/Makefile
@@ -0,0 +1,53 @@
+.PHONY: help
+
+help:
+	@echo "🛠️ code_sandbox Commands:\n"
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+
+.PHONY: install
+install: ## Install the poetry environment and install the pre-commit hooks
+	@echo "📦 Checking if Poetry is installed"
+	@if ! command -v poetry &> /dev/null; then \
+		echo "📦 Installing Poetry with pip"; \
+		pip install poetry; \
+	else \
+		echo "📦 Poetry is already installed"; \
+	fi
+	@echo "🚀 Installing package in development mode with all extras"
+	poetry install --all-extras
+
+.PHONY: build
+build: clean-build ## Build wheel file using poetry
+	@echo "🚀 Creating wheel file"
+	poetry build
+
+.PHONY: clean-build
+clean-build: ## clean build artifacts
+	@echo "🗑️ Cleaning dist directory"
+	rm -rf dist
+
+.PHONY: test
+test: ## Test the code with pytest
+	@echo "🚀 Testing code: Running pytest"
+	@poetry run pytest -W ignore -v --cov --cov-config=pyproject.toml --cov-report=xml
+
+.PHONY: coverage
+coverage: ## Generate coverage report
+	@echo "coverage report"
+	coverage report
+	@echo "Generating coverage report"
+	coverage html
+
+.PHONY: bump-version
+bump-version: ## Bump the version in the pyproject.toml file
+	@echo "🚀 Bumping version in pyproject.toml"
+	poetry version patch
+
+.PHONY: check
+check: ## Run code quality tools.
+	@echo "🚀 Checking Poetry lock file consistency with 'pyproject.toml': Running poetry check"
+	@poetry check
+	@echo "🚀 Linting code: Running pre-commit"
+	@poetry run pre-commit run -a
+	@echo "🚀 Static type checking: Running mypy"
+	@poetry run mypy --config-file=pyproject.toml
diff --git a/toolkits/code_sandbox/arcade_code_sandbox/tools/e2b.py b/toolkits/code_sandbox/arcade_code_sandbox/tools/e2b.py
@@ -1,8 +1,8 @@
 from typing import Annotated
 
+from arcade.sdk import tool
 from e2b_code_interpreter import Sandbox
 
-from arcade.sdk import tool
 from arcade_code_sandbox.tools.models import E2BSupportedLanguage
 from arcade_code_sandbox.tools.utils import get_secret
 
@@ -24,16 +24,18 @@ def run_code(
     with Sandbox(api_key=api_key) as sbx:
         execution = sbx.run_code(code=code, language=language)
 
-    return execution.to_json()
+    return str(execution.to_json())
 
 
-# Note: Not recommended to use tool_choice='generate' with this tool since it contains base64 encoded image.
+# Note: Not recommended to use tool_choice='generate' with this tool
+#       since it contains base64 encoded image.
 @tool
 def create_static_matplotlib_chart(
     code: Annotated[str, "The Python code to run"],
 ) -> Annotated[dict, "A dictionary with the following keys: base64_image, logs, error"]:
     """
-    Run the provided Python code to generate a static matplotlib chart. The resulting chart is returned as a base64 encoded image.
+    Run the provided Python code to generate a static matplotlib chart.
+    The resulting chart is returned as a base64 encoded image.
     """
     api_key = get_secret("E2B_API_KEY")
 

diff --git a/toolkits/code_sandbox/evals/eval_e2b.py b/toolkits/code_sandbox/evals/eval_e2b.py
@@ -1,7 +1,3 @@
-import arcade_code_sandbox
-from arcade_code_sandbox.tools.e2b import create_static_matplotlib_chart, run_code
-from arcade_code_sandbox.tools.models import E2BSupportedLanguage
-
 from arcade.sdk import ToolCatalog
 from arcade.sdk.eval import (
     BinaryCritic,
@@ -12,6 +8,10 @@
     tool_eval,
 )
 
+import arcade_code_sandbox
+from arcade_code_sandbox.tools.e2b import create_static_matplotlib_chart, run_code
+from arcade_code_sandbox.tools.models import E2BSupportedLanguage
+
 merge_sort_code = """
 def merge_sort(arr):
     if len(arr) <= 1: