diff --git a/README.md b/README.md index bcc079f..19a1ddf 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ A image creator based on **free** `Cloudflare AI` and `HuggingFace` APIs. Featur You can use either `Fullstack` or `Client-Server` mode. +> You may need to initialize `Cloudflare AI` `llama3.2 11B vision` model before using `Image-to-Prompt` feature. See [here](https://developers.cloudflare.com/workers-ai/models/llama-3.2-11b-vision-instruct/#Input) for more information. + #### 1.1.1 Fullstack Set following environment variables in `.env` file or `Vercel`. diff --git a/app/api/prompt/route.ts b/app/api/prompt/route.ts index f092738..0f39bc2 100644 --- a/app/api/prompt/route.ts +++ b/app/api/prompt/route.ts @@ -1,11 +1,11 @@ export async function POST(req: Request): Promise { try { const { image } = await req.json() - const url = `https://api.cloudflare.com/client/v4/accounts/${process.env.CF_USER_ID}/ai/run/@cf/unum/uform-gen2-qwen-500m` + const url = `https://api.cloudflare.com/client/v4/accounts/${process.env.CF_USER_ID}/ai/run/@cf/meta/llama-3.2-11b-vision-instruct` const body = { image: image as number[], max_tokens: 4096, - prompt: 'Generate a detailed description in a single paragraph for this image', + prompt: 'Analyze the given image and provide a detailed description. Include details about the main subject/people, background, colors, composition, and mood. Ensure the description is vivid and suitable for input into a text-to-image generation model.', } const response = await fetch(url, { method: 'POST', diff --git a/app/components/Prompt.tsx b/app/components/Prompt.tsx index 9fa59b9..5ab9813 100644 --- a/app/components/Prompt.tsx +++ b/app/components/Prompt.tsx @@ -86,7 +86,7 @@ export default function Prompt() { showUploadList={false} accept='.jpg,.jpeg,.png' beforeUpload={async (file) => { - const MAX_SIZE_MB = 2 + const MAX_SIZE_MB = 5 try { flushSync(() => setDisabled(true)) if (file.size > MAX_SIZE_MB * 1024 * 1024) { @@ -96,7 +96,7 @@ export default function Prompt() { const uint8array = new Uint8Array(await file.arrayBuffer()) let res: Response | undefined if (process.env.NEXT_PUBLIC_WORKERS_SERVER) { - res = await fetch(`${process.env.NEXT_PUBLIC_WORKERS_SERVER}/painter/genprompt`, { + res = await fetch(`${process.env.NEXT_PUBLIC_WORKERS_SERVER}/painter/genprompt/v4`, { method: 'POST', body: JSON.stringify({ image: Array.from(uint8array) }) }) @@ -111,7 +111,7 @@ export default function Prompt() { return false } const data = await res.json() - const prompt = data.result.description as string + const prompt = data.result.response as string form.setFieldsValue({ prompt }) return false } finally {