diff --git a/skills/openrouter-images/SKILL.md b/skills/openrouter-images/SKILL.md index 6055b3a..e5cdd7b 100644 --- a/skills/openrouter-images/SKILL.md +++ b/skills/openrouter-images/SKILL.md @@ -36,8 +36,9 @@ Create a new image from a text prompt: ```bash cd /scripts && npx tsx generate.ts "a red panda wearing sunglasses" cd /scripts && npx tsx generate.ts "a futuristic cityscape at night" --aspect-ratio 16:9 -cd /scripts && npx tsx generate.ts "pixel art of a dragon" --output dragon.png +cd /scripts && npx tsx generate.ts "pixel art of a dragon" --output dragon cd /scripts && npx tsx generate.ts "a watercolor painting" --model google/gemini-2.5-flash-image +cd /scripts && npx tsx generate.ts "red logo on white" --model recraft-ai/recraft-v3-svg --rgb-colors "220,30,30" ``` ### Options @@ -45,9 +46,14 @@ cd /scripts && npx tsx generate.ts "a watercolor painting" --model g | Flag | Description | Default | |---|---|---| | `--model ` | OpenRouter model ID | `google/gemini-3.1-flash-image-preview` | -| `--output ` | Output file path | `image-YYYYMMDD-HHmmss.png` | +| `--output ` | Output path stem (extension auto-derived from MIME type) | `image-YYYYMMDD-HHmmss` | | `--aspect-ratio ` | Aspect ratio (e.g. `16:9`, `1:1`, `4:3`) | Model default | | `--image-size ` | Image size (e.g. `1K`, `2K`) | Model default | +| `--rgb-colors ` | Semicolon-separated RGB palette, e.g. `255,0,0;0,128,0` (Recraft) | — | +| `--background-rgb-color ` | Background color as `r,g,b` (Recraft) | — | +| `--strength <0-1>` | Influence strength for style/color transfer (Recraft) | — | + +**Output extension:** The file extension (`.png`, `.jpg`, `.webp`, `.svg`, etc.) is derived automatically from the MIME type returned by the model. If you pass `--output dragon`, the saved file might be `dragon.png` or `dragon.svg` depending on the model. ## Edit Image @@ -55,8 +61,9 @@ Modify an existing image with a text prompt: ```bash cd /scripts && npx tsx edit.ts photo.png "make the sky purple" -cd /scripts && npx tsx edit.ts avatar.jpg "add a party hat" --output avatar-hat.png +cd /scripts && npx tsx edit.ts avatar.jpg "add a party hat" --output avatar-hat cd /scripts && npx tsx edit.ts scene.png "convert to watercolor style" --model google/gemini-2.5-flash-image +cd /scripts && npx tsx edit.ts logo.png "recolor in red palette" --rgb-colors "220,30,30;180,20,20" ``` ### Options @@ -64,9 +71,12 @@ cd /scripts && npx tsx edit.ts scene.png "convert to watercolor styl | Flag | Description | Default | |---|---|---| | `--model ` | OpenRouter model ID | `google/gemini-3.1-flash-image-preview` | -| `--output ` | Output file path | `image-YYYYMMDD-HHmmss.png` | +| `--output ` | Output path stem (extension auto-derived from MIME type) | `image-YYYYMMDD-HHmmss` | | `--aspect-ratio ` | Aspect ratio (e.g. `16:9`, `1:1`, `4:3`) | Model default | | `--image-size ` | Image size (e.g. `1K`, `2K`) | Model default | +| `--rgb-colors ` | Semicolon-separated RGB palette, e.g. `255,0,0;0,128,0` (Recraft) | — | +| `--background-rgb-color ` | Background color as `r,g,b` (Recraft) | — | +| `--strength <0-1>` | Influence strength for style/color transfer (Recraft) | — | Supported input formats: `.png`, `.jpg`, `.jpeg`, `.webp`, `.gif` @@ -97,20 +107,16 @@ Supported input formats: `.png`, `.jpg`, `.jpeg`, `.webp`, `.gif` ## API Response Shapes -Image generation uses `POST /api/v1/responses` with `modalities: ["image", "text"]`. See the [Responses API reference](https://openrouter.ai/docs/api/reference/responses/overview) and [image generation guide](https://openrouter.ai/docs/guides/overview/multimodal/image-generation) for full request details. +Image generation uses `POST /api/v1/chat/completions`. Google models require `modalities: ["image", "text"]`; other models (Recraft, DALL-E, etc.) must omit `modalities` to avoid a 404. -The image-specific output item type is `image_generation_call` — this is not obvious from the general Responses API docs: +Images are extracted from four possible response shapes, tried in order: -```json -{ - "type": "image_generation_call", - "id": "imagegen-abc123", - "status": "completed", - "result": "" -} -``` +1. **OpenRouter extension** — `choices[0].message.images[]` (string array) +2. **Responses API items** — `output[].type == "image_generation_call"` with `status == "completed"` +3. **DALL-E / native** — `data[].url` or `data[].b64_json` +4. **Content array** — `choices[0].message.content[].type == "image_url"` -This appears alongside standard `message` output items in the `output` array. Text and image outputs may each be absent depending on the model and prompt. +The saved file extension (`.png`, `.jpg`, `.webp`, `.svg`, etc.) is derived from the MIME type in the response — either the `content-type` header (for HTTP URL images) or the `data:` URL prefix (for base64 images). ## Using a Different Model diff --git a/skills/openrouter-images/scripts/edit.ts b/skills/openrouter-images/scripts/edit.ts index 37199b1..4fc2714 100644 --- a/skills/openrouter-images/scripts/edit.ts +++ b/skills/openrouter-images/scripts/edit.ts @@ -1,9 +1,13 @@ +import { extname } from "node:path"; import { DEFAULT_MODEL, requireApiKey, parseArgs, + parseRgbColors, + parseRgbTriplet, postChatCompletion, readImageAsDataUrl, + extractImages, saveImage, defaultOutputPath, } from "./lib.js"; @@ -15,7 +19,12 @@ const imagePath = args.get("_0") as string | undefined; const prompt = args.get("_1") as string | undefined; if (!imagePath || !prompt) { - console.error("Usage: npx tsx edit.ts \"prompt\" [--model ] [--output ] [--aspect-ratio ] [--image-size ]"); + console.error( + "Usage: npx tsx edit.ts \"prompt\" [--model ] [--output ]\n" + + " [--aspect-ratio ] [--image-size ]\n" + + " [--rgb-colors \"r,g,b[;r,g,b...]\"] [--background-rgb-color \"r,g,b\"]\n" + + " [--strength <0-1>]" + ); process.exit(1); } @@ -23,12 +32,25 @@ const model = (args.get("model") as string) || DEFAULT_MODEL; const outputBase = (args.get("output") as string) || defaultOutputPath(); const aspectRatio = args.get("aspect-ratio") as string | undefined; const imageSize = args.get("image-size") as string | undefined; +const rgbColorsRaw = args.get("rgb-colors") as string | undefined; +const bgColorRaw = args.get("background-rgb-color") as string | undefined; +const strengthRaw = args.get("strength") as string | undefined; const dataUrl = readImageAsDataUrl(imagePath as string); -const imageConfig: Record = {}; +const imageConfig: Record = {}; if (aspectRatio) imageConfig.aspect_ratio = aspectRatio; if (imageSize) imageConfig.image_size = imageSize; +if (rgbColorsRaw) imageConfig.rgb_colors = parseRgbColors(rgbColorsRaw); +if (bgColorRaw) imageConfig.background_rgb_color = parseRgbTriplet(bgColorRaw); +if (strengthRaw) { + const s = parseFloat(strengthRaw); + if (isNaN(s) || s < 0 || s > 1) { + console.error("Error: --strength must be a number between 0 and 1."); + process.exit(1); + } + imageConfig.strength = s; +} const body: any = { model, @@ -41,41 +63,44 @@ const body: any = { ], }, ], - modalities: ["image", "text"], + // Recraft and other non-Google models reject modalities:"image","text" with a 404. + // Google models require it for image output. + ...(model.startsWith("google/") ? { modalities: ["image", "text"] } : {}), ...(Object.keys(imageConfig).length > 0 ? { image_config: imageConfig } : {}), }; const json = await postChatCompletion(apiKey, body); -const message = json.choices?.[0]?.message; - -if (!message) { - console.error("Error: No response from model."); - process.exit(1); -} -if (message.content) { - console.error(`Model: ${message.content}`); +const textContent = json.choices?.[0]?.message?.content; +if (textContent) { + console.error(`Model: ${textContent}`); } -const images: string[] = message.images ?? []; +const images = extractImages(json); if (images.length === 0) { console.error("Error: No images returned by model."); + console.error("Response:", JSON.stringify(json, null, 2)); process.exit(1); } const saved: string[] = []; for (let i = 0; i < images.length; i++) { - const img = images[i].startsWith("data:") ? images[i] : `data:image/png;base64,${images[i]}`; + const raw = images[i]; + // Normalise: pass data: and https: URLs as-is; wrap bare base64 as PNG. + const imgData = + raw.startsWith("data:") || raw.startsWith("http://") || raw.startsWith("https://") + ? raw + : `data:image/png;base64,${raw}`; + let outPath: string; if (images.length === 1) { outPath = outputBase; } else { - const dotIdx = outputBase.lastIndexOf("."); - const base = dotIdx > 0 ? outputBase.slice(0, dotIdx) : outputBase; - const ext = dotIdx > 0 ? outputBase.slice(dotIdx) : ".png"; - outPath = `${base}-${i + 1}${ext}`; + const currentExt = extname(outputBase); + const base = currentExt ? outputBase.slice(0, -currentExt.length) : outputBase; + outPath = `${base}-${i + 1}`; } - const abs = saveImage(img, outPath); + const abs = await saveImage(imgData, outPath); saved.push(abs); } diff --git a/skills/openrouter-images/scripts/generate.ts b/skills/openrouter-images/scripts/generate.ts index 50e7f72..5a86a53 100644 --- a/skills/openrouter-images/scripts/generate.ts +++ b/skills/openrouter-images/scripts/generate.ts @@ -1,8 +1,12 @@ +import { extname } from "node:path"; import { DEFAULT_MODEL, requireApiKey, parseArgs, + parseRgbColors, + parseRgbTriplet, postChatCompletion, + extractImages, saveImage, defaultOutputPath, } from "./lib.js"; @@ -12,7 +16,12 @@ const args = parseArgs(process.argv.slice(2)); const prompt = args.get("_0") as string | undefined; if (!prompt) { - console.error("Usage: npx tsx generate.ts \"prompt\" [--model ] [--output ] [--aspect-ratio ] [--image-size ]"); + console.error( + "Usage: npx tsx generate.ts \"prompt\" [--model ] [--output ]\n" + + " [--aspect-ratio ] [--image-size ]\n" + + " [--rgb-colors \"r,g,b[;r,g,b...]\"] [--background-rgb-color \"r,g,b\"]\n" + + " [--strength <0-1>]" + ); process.exit(1); } @@ -20,49 +29,65 @@ const model = (args.get("model") as string) || DEFAULT_MODEL; const outputBase = (args.get("output") as string) || defaultOutputPath(); const aspectRatio = args.get("aspect-ratio") as string | undefined; const imageSize = args.get("image-size") as string | undefined; +const rgbColorsRaw = args.get("rgb-colors") as string | undefined; +const bgColorRaw = args.get("background-rgb-color") as string | undefined; +const strengthRaw = args.get("strength") as string | undefined; -const imageConfig: Record = {}; +const imageConfig: Record = {}; if (aspectRatio) imageConfig.aspect_ratio = aspectRatio; if (imageSize) imageConfig.image_size = imageSize; +if (rgbColorsRaw) imageConfig.rgb_colors = parseRgbColors(rgbColorsRaw); +if (bgColorRaw) imageConfig.background_rgb_color = parseRgbTriplet(bgColorRaw); +if (strengthRaw) { + const s = parseFloat(strengthRaw); + if (isNaN(s) || s < 0 || s > 1) { + console.error("Error: --strength must be a number between 0 and 1."); + process.exit(1); + } + imageConfig.strength = s; +} const body: any = { model, messages: [{ role: "user", content: prompt }], - modalities: ["image", "text"], + // Recraft and other non-Google models reject modalities:"image","text" with a 404. + // Google models require it for image output. + ...(model.startsWith("google/") ? { modalities: ["image", "text"] } : {}), ...(Object.keys(imageConfig).length > 0 ? { image_config: imageConfig } : {}), }; const json = await postChatCompletion(apiKey, body); -const message = json.choices?.[0]?.message; - -if (!message) { - console.error("Error: No response from model."); - process.exit(1); -} -if (message.content) { - console.error(`Model: ${message.content}`); +const textContent = json.choices?.[0]?.message?.content; +if (textContent) { + console.error(`Model: ${textContent}`); } -const images: string[] = message.images ?? []; +const images = extractImages(json); if (images.length === 0) { console.error("Error: No images returned by model."); + console.error("Response:", JSON.stringify(json, null, 2)); process.exit(1); } const saved: string[] = []; for (let i = 0; i < images.length; i++) { - const dataUrl = images[i].startsWith("data:") ? images[i] : `data:image/png;base64,${images[i]}`; + const raw = images[i]; + // Normalise: pass data: and https: URLs as-is; wrap bare base64 as PNG. + const imgData = + raw.startsWith("data:") || raw.startsWith("http://") || raw.startsWith("https://") + ? raw + : `data:image/png;base64,${raw}`; + let outPath: string; if (images.length === 1) { outPath = outputBase; } else { - const dotIdx = outputBase.lastIndexOf("."); - const base = dotIdx > 0 ? outputBase.slice(0, dotIdx) : outputBase; - const ext = dotIdx > 0 ? outputBase.slice(dotIdx) : ".png"; - outPath = `${base}-${i + 1}${ext}`; + const currentExt = extname(outputBase); + const base = currentExt ? outputBase.slice(0, -currentExt.length) : outputBase; + outPath = `${base}-${i + 1}`; } - const abs = saveImage(dataUrl, outPath); + const abs = await saveImage(imgData, outPath); saved.push(abs); } diff --git a/skills/openrouter-images/scripts/lib.ts b/skills/openrouter-images/scripts/lib.ts index 143aea4..8dbede4 100644 --- a/skills/openrouter-images/scripts/lib.ts +++ b/skills/openrouter-images/scripts/lib.ts @@ -37,17 +37,37 @@ export function parseArgs(argv: string[]): Map { export async function postChatCompletion(apiKey: string, body: any): Promise { const url = "https://openrouter.ai/api/v1/chat/completions"; - const res = await fetch(url, { - method: "POST", - headers: { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify(body), - }); - - if (!res.ok) { + const MAX_RETRIES = 3; + const BASE_DELAY_MS = 2000; + + for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { + const res = await fetch(url, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + }); + + if (res.ok) { + return res.json(); + } + const text = await res.text().catch(() => ""); + + if ((res.status === 429 || res.status >= 500) && attempt < MAX_RETRIES) { + const retryAfterRaw = res.headers.get("retry-after"); + const delay = retryAfterRaw + ? parseInt(retryAfterRaw, 10) * 1000 + : BASE_DELAY_MS * attempt; + console.error( + `Warning: HTTP ${res.status}, retrying in ${delay}ms (attempt ${attempt}/${MAX_RETRIES})...` + ); + await new Promise((r) => setTimeout(r, delay)); + continue; + } + switch (res.status) { case 401: console.error("Error 401: Invalid API key. Check your OPENROUTER_API_KEY."); @@ -61,7 +81,48 @@ export async function postChatCompletion(apiKey: string, body: any): Promise 0) return msgImages as string[]; + + const outputs = json.output; + if (Array.isArray(outputs)) { + const imgs = outputs + .filter( + (o: any) => + o.type === "image_generation_call" && o.status === "completed" && o.result + ) + .map((o: any) => o.result as string); + if (imgs.length > 0) return imgs; + } + + if (Array.isArray(json.data) && json.data.length > 0) { + return json.data + .map((d: any) => { + if (d.b64_json) return `data:image/png;base64,${d.b64_json}`; + if (d.url) return d.url as string; + return null; + }) + .filter(Boolean) as string[]; + } + + const content = json.choices?.[0]?.message?.content; + if (Array.isArray(content)) { + const urls = content + .filter((c: any) => c.type === "image_url" && c.image_url?.url) + .map((c: any) => c.image_url.url as string); + if (urls.length > 0) return urls; + } + + return []; } const MIME_MAP: Record = { @@ -72,6 +133,14 @@ const MIME_MAP: Record = { ".gif": "image/gif", }; +const MIME_TO_EXT: Record = { + "image/png": ".png", + "image/jpeg": ".jpg", + "image/webp": ".webp", + "image/gif": ".gif", + "image/svg+xml": ".svg", +}; + export function readImageAsDataUrl(filePath: string): string { const abs = resolve(filePath); const ext = extname(abs).toLowerCase(); @@ -84,22 +153,76 @@ export function readImageAsDataUrl(filePath: string): string { return `data:${mime};base64,${data.toString("base64")}`; } -export function saveImage(dataUrl: string, outputPath: string): string { - const match = dataUrl.match(/^data:[^;]+;base64,(.+)$/); - if (!match) { - console.error("Error: Invalid data URL format in response."); - process.exit(1); +// Replace or add the correct extension on outputPath based on MIME type. +// Uses path.extname() so dots in parent directory names (e.g. `..`) are ignored. +export function pathWithMimeExt(outputPath: string, mime: string): string { + const ext = MIME_TO_EXT[mime] ?? ".png"; + const currentExt = extname(outputPath); + const stem = currentExt ? outputPath.slice(0, -currentExt.length) : outputPath; + return stem + ext; +} + +// Save an image from either a data: URL or an HTTP(S) URL. +// Derives the file extension from the MIME type and applies it to outputPath. +// Returns the absolute path actually written. +export async function saveImage(rawImage: string, outputPath: string): Promise { + let buffer: Buffer; + let mime: string; + + if (rawImage.startsWith("http://") || rawImage.startsWith("https://")) { + const res = await fetch(rawImage); + if (!res.ok) { + console.error(`Error: Failed to download image from URL: HTTP ${res.status}`); + process.exit(1); + } + const ct = res.headers.get("content-type") ?? "image/png"; + mime = ct.split(";")[0].trim(); + buffer = Buffer.from(await res.arrayBuffer()); + } else { + const match = rawImage.match(/^data:([^;]+);base64,(.+)$/); + if (!match) { + console.error("Error: Invalid image data: expected a data: URL or https:// URL."); + process.exit(1); + } + mime = match[1]; + buffer = Buffer.from(match[2], "base64"); } - const abs = resolve(outputPath); - writeFileSync(abs, Buffer.from(match[1], "base64")); - return abs; + + const actualPath = pathWithMimeExt(outputPath, mime); + writeFileSync(resolve(actualPath), buffer); + return resolve(actualPath); } +// Returns a timestamped base path without extension. +// saveImage will add the correct extension based on the returned MIME type. export function defaultOutputPath(): string { const now = new Date(); const pad = (n: number) => String(n).padStart(2, "0"); const stamp = `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}` + `-${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`; - return `image-${stamp}.png`; + return `image-${stamp}`; +} + +// Parse a single "r,g,b" triplet into a [r, g, b] tuple. +export function parseRgbTriplet(s: string): [number, number, number] { + const parts = s.trim().split(","); + if (parts.length !== 3) { + console.error(`Error: Invalid RGB triplet "${s}". Expected format: "r,g,b" (e.g. "255,0,128").`); + process.exit(1); + } + const vals = parts.map((p) => { + const n = parseInt(p.trim(), 10); + if (isNaN(n) || n < 0 || n > 255) { + console.error(`Error: RGB value "${p.trim()}" out of range. Expected 0-255.`); + process.exit(1); + } + return n; + }) as [number, number, number]; + return vals; +} + +// Parse a semicolon-separated list of RGB triplets: "255,0,0;0,255,0". +export function parseRgbColors(s: string): Array<[number, number, number]> { + return s.split(";").map((t) => parseRgbTriplet(t)); }