From 145b581780f149c412f88a21805f3c7dfb0491db Mon Sep 17 00:00:00 2001 From: Maksym Yezhov Date: Tue, 2 Jun 2026 21:55:29 -0700 Subject: [PATCH] feat: v2 - ai assistant - tangent researcher subagent --- src/agent/agents/runViewDispatcher.ts | 7 + .../agents/subagents/tangentResearcher.ts | 52 ++++++ src/agent/prompts/runViewDispatcher.md | 11 +- src/agent/prompts/tangentResearcher.md | 57 ++++++ .../AiChat/components/TangentScenario.tsx | 166 ++++++++++++++++++ .../AiChat/components/renderMarkdown.tsx | 8 + 6 files changed, 297 insertions(+), 4 deletions(-) create mode 100644 src/agent/agents/subagents/tangentResearcher.ts create mode 100644 src/agent/prompts/tangentResearcher.md create mode 100644 src/routes/v2/shared/components/AiChat/components/TangentScenario.tsx diff --git a/src/agent/agents/runViewDispatcher.ts b/src/agent/agents/runViewDispatcher.ts index d9b3469a5..82b641203 100644 --- a/src/agent/agents/runViewDispatcher.ts +++ b/src/agent/agents/runViewDispatcher.ts @@ -25,6 +25,7 @@ import { } from "./dispatcherRuntime"; import { createDebugAssistantAgent } from "./subagents/debugAssistant"; import { createGeneralHelpAgent } from "./subagents/generalHelp"; +import { createTangentResearcherAgent } from "./subagents/tangentResearcher"; function formatCurrentRunSection(context: AgentContext): string { if (context.mode !== "runView") { @@ -39,6 +40,7 @@ function formatCurrentRunSection(context: AgentContext): string { async function buildRunViewAgent(session: AgentSession): Promise { const generalHelp = createGeneralHelpAgent(session); const debugAssistant = createDebugAssistantAgent(session); + const tangentResearcher = createTangentResearcherAgent(session); const instructions = `${runViewDispatcherPrompt}\n\n${formatCurrentRunSection(session.context)}`; @@ -57,6 +59,11 @@ async function buildRunViewAgent(session: AgentSession): Promise { toolDescription: "Ask the debug-assistant specialist to inspect or explain a pipeline run from execution details, container state, and logs. Read-only — cannot edit the spec or submit runs. Input: a clear question that names the run id, e.g. 'Explain what run 12345 did and its outcome.' or 'Why did run 12345 fail?'.", }), + tangentResearcher.asTool({ + toolName: "create_optimization_scenario", + toolDescription: + "Ask the Tangent Researcher to analyze the current run for ML optimization potential and produce a 0-100 score plus prioritized hyperparameter-tuning and experiment ideas. Read-only. Input must name the run id, e.g. 'Analyze run 12345 for optimization opportunities.'.", + }), ], }); attachObservabilityHooks(agent, session.emitStatus); diff --git a/src/agent/agents/subagents/tangentResearcher.ts b/src/agent/agents/subagents/tangentResearcher.ts new file mode 100644 index 000000000..43c902c6b --- /dev/null +++ b/src/agent/agents/subagents/tangentResearcher.ts @@ -0,0 +1,52 @@ +/** + * Tangent Researcher sub-agent — scores a pipeline run for ML + * optimization potential and proposes prioritized tuning/experiment + * ideas. + * + * Read-only by design: it inspects run metadata and (optionally) the + * pipeline spec, but has no CSOM mutation or run-submission tools. Runs + * with high reasoning effort because scoring optimization potential is a + * judgment-heavy task. + * + * The session's `recentRuns` are appended to the system prompt at agent + * creation time (per turn) so the model can resolve "this run" / "the + * latest run" without an extra tool call. + */ +import { Agent } from "@openai/agents"; + +import { requireOrchestratorModel } from "../../config"; +import { attachObservabilityHooks } from "../../middleware/observability"; +import tangentResearcherPrompt from "../../prompts/tangentResearcher.md?raw"; +import type { AgentSession, RecentPipelineRun } from "../../session"; +import { createCsomTools } from "../../tools/csomTools"; +import { createRunTools } from "../../tools/runTools"; + +const RECENT_RUNS_PROMPT_LIMIT = 5; + +function formatRecentRunsSection(runs: RecentPipelineRun[]): string { + if (runs.length === 0) return "## Recent runs\n\nNo recent runs available."; + const lines = runs.slice(0, RECENT_RUNS_PROMPT_LIMIT).map((run) => { + const status = run.status ? ` — status: ${run.status}` : ""; + return `- run ${run.id} (root execution ${run.root_execution_id}, created ${run.created_at})${status}`; + }); + return `## Recent runs\n\n${lines.join("\n")}`; +} + +export function createTangentResearcherAgent(session: AgentSession): Agent { + const csom = createCsomTools(session.bridge); + const runTools = createRunTools(session.bridge); + + const instructions = `${tangentResearcherPrompt}\n\n${formatRecentRunsSection(session.recentRuns)}`; + + const agent = new Agent({ + name: "tangent-researcher", + handoffDescription: `Score a pipeline run 0-100 for ML optimization potential and propose prioritized + hyperparameter-tuning and experiment ideas. Read-only — cannot edit the pipeline or submit runs.`, + instructions, + tools: [runTools.getRunStatus, csom.getPipelineState], + model: requireOrchestratorModel(), + modelSettings: { reasoning: { effort: "high" } }, + }); + attachObservabilityHooks(agent, session.emitStatus); + return agent; +} diff --git a/src/agent/prompts/runViewDispatcher.md b/src/agent/prompts/runViewDispatcher.md index 042883d2f..6a135b001 100644 --- a/src/agent/prompts/runViewDispatcher.md +++ b/src/agent/prompts/runViewDispatcher.md @@ -6,10 +6,11 @@ You are the **Tangle Run View Assistant**, the entry point for the AI assistant Each specialist is exposed to you as a tool. Calling a tool runs the specialist's own sub-agent loop and returns its final response as a string. -| Tool | When to call it | -| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `ask_general_help` | Any question about Tangle concepts, features, how things work, best practices, getting started, or documentation lookups (e.g. "what is a pipeline?", "what does this component do?", "what are subgraphs?"). Not specific to the current run. | -| `ask_debug_assistant` | Any request to inspect, diagnose, or **explain the current run** — "what did this run do?", "why did it fail?", "what went wrong with run 12345?", "show me the error", "explain the outcome". Read-only: it inspects execution details, container state, and logs only. | +| Tool | When to call it | +| ------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `ask_general_help` | Any question about Tangle concepts, features, how things work, best practices, getting started, or documentation lookups (e.g. "what is a pipeline?", "what does this component do?", "what are subgraphs?"). Not specific to the current run. | +| `ask_debug_assistant` | Any request to inspect, diagnose, or **explain the current run** — "what did this run do?", "why did it fail?", "what went wrong with run 12345?", "show me the error", "explain the outcome". Read-only: it inspects execution details, container state, and logs only. | +| `create_optimization_scenario` | Any request to **optimize, tune, or improve the model/pipeline** — "how can I optimize this?", "what hyperparameters should I tune?", "where can Tangent help?", "improve this model", "find experiment ideas". Read-only: it scores the run 0-100 for ML optimization potential and proposes prioritized ideas. | ## Calling a specialist @@ -29,6 +30,8 @@ When a specialist tool returns, **relay its response** to the user. Specialists Preserve those links exactly — do not rewrite them as bold, italic, or backticks, and do not invent ids. Return the tool output essentially as-is; you may add at most one short framing sentence if it genuinely helps, but the content must come from the specialist. Never announce or describe the tool calls themselves to the user. +When `create_optimization_scenario` returns a fenced ` ```tangent-scenario ` code block, **relay that block verbatim** — do not unwrap it, reformat it, summarize it, or add prose around it. The UI renders it into interactive idea cards. + ## When NOT to call a tool - **Mutation/run requests** ("fix this", "rerun it", "change this input") — this view is read-only. Explain plainly that editing and running happen in the pipeline Editor, then offer to explain the run instead. diff --git a/src/agent/prompts/tangentResearcher.md b/src/agent/prompts/tangentResearcher.md new file mode 100644 index 000000000..6dc7752d2 --- /dev/null +++ b/src/agent/prompts/tangentResearcher.md @@ -0,0 +1,57 @@ +# Tangent Researcher — System Prompt + +You are the **Tangent Researcher**, an ML optimization expert who identifies hyperparameter tuning opportunities in Shopify's Tangle ML pipelines. Score pipelines 0-100 based on optimization potential: high scores for pipelines with manual grid search, no Bayesian optimization, unexplored hyperparameter space, stale tuning, or complex architectures with many knobs. + +You are **read-only** — you inspect a run, you never edit the pipeline or submit runs. + +## Your Workflow + +1. The run id you are analyzing is provided in your input. If the input says "the current run" / "this run" without a number, consult the **Recent runs** section appended below and pick the most recent entry. +2. Call `get_run_status(runId)` first to obtain the full run metadata and derived overall status. This is your highest-signal call — the returned `run` JSON is the basis for your scoring. +3. If you need to point at a specific task in the pipeline spec, call `get_pipeline_state` once. +4. Score the run for ML optimization potential and produce prioritized experiment ideas. + +## Scoring guidance + +Score 0-100 based on optimization potential. Award **high scores** for pipelines with: + +- Manual grid search (no automated search strategy). +- No Bayesian optimization. +- Large, unexplored hyperparameter space. +- Stale tuning (parameters look like defaults or haven't been revisited). +- Complex architectures with many tunable knobs. + +Award **low scores** for pipelines that are already well-tuned, have a narrow hyperparameter surface, or where optimization would yield little. + +## Idea taxonomy + +Each idea MUST include an `ideaType` tag from this enum: + +- `feature_engineering`: adding/transforming/removing input features (cross-shop signals, interaction terms, embedding pooling, etc.). +- `hyperparameter_optimization`: tuning existing knobs (LR, schedule, alpha, temperature, batch size, depth). +- `input_data`: changing the training data (new label sources, sample mixes, negative mining, dataset filtering). +- `model_architecture`: structural changes (layer freezing, new heads, swapping backbones, capacity changes). + +When an idea spans two types, pick the one capturing the _primary_ change. + +## Response format + +Respond with a single fenced code block tagged `tangent-scenario` containing ONLY the JSON object (no other prose before or after it, no other markdown). The UI recognizes this block and renders each idea as a card. + +```tangent-scenario +{ + "score": , + "rationale": "<2 concise sentences explaining the opportunity score>", + "summary": "<2-3 paragraph analysis: what the pipeline does, where Tangent helps, what experiments to prioritize>", + "ideas": [ + { + "title": "", + "ideaType": "", + "impact": "high|medium|low", + "evidence": "<1 sentence from the run data>" + } + ] +} +``` + +Do not wrap the block in additional commentary. The entire response is the fenced block. diff --git a/src/routes/v2/shared/components/AiChat/components/TangentScenario.tsx b/src/routes/v2/shared/components/AiChat/components/TangentScenario.tsx new file mode 100644 index 000000000..cf80352b5 --- /dev/null +++ b/src/routes/v2/shared/components/AiChat/components/TangentScenario.tsx @@ -0,0 +1,166 @@ +import { Badge } from "@/components/ui/badge"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { BlockStack, InlineStack } from "@/components/ui/layout"; +import { Paragraph, Text } from "@/components/ui/typography"; + +type Impact = "high" | "medium" | "low"; + +type IdeaType = + | "feature_engineering" + | "hyperparameter_optimization" + | "input_data" + | "model_architecture"; + +interface ScenarioIdea { + title: string; + ideaType: IdeaType; + impact: Impact; + evidence: string; +} + +interface Scenario { + score: number; + rationale: string; + summary: string; + ideas: ScenarioIdea[]; +} + +const IDEA_TYPE_LABEL: Record = { + feature_engineering: "Feature engineering", + hyperparameter_optimization: "Hyperparameter optimization", + input_data: "Input data", + model_architecture: "Model architecture", +}; + +type BadgeVariant = "default" | "secondary" | "outline"; + +const IMPACT_VARIANT: Record = { + high: "default", + medium: "secondary", + low: "outline", +}; + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null; +} + +function parseIdea(value: unknown): ScenarioIdea | null { + if (!isRecord(value)) return null; + const { title, ideaType, impact, evidence } = value; + if (typeof title !== "string") return null; + if ( + ideaType !== "feature_engineering" && + ideaType !== "hyperparameter_optimization" && + ideaType !== "input_data" && + ideaType !== "model_architecture" + ) { + return null; + } + if (impact !== "high" && impact !== "medium" && impact !== "low") return null; + if (typeof evidence !== "string") return null; + return { title, ideaType, impact, evidence }; +} + +function parseScenario(raw: string): Scenario | null { + let value: unknown; + try { + value = JSON.parse(raw); + } catch { + return null; + } + if (!isRecord(value)) return null; + const { score, rationale, summary, ideas } = value; + if (typeof score !== "number") return null; + if (typeof rationale !== "string") return null; + if (typeof summary !== "string") return null; + if (!Array.isArray(ideas)) return null; + + const parsedIdeas: ScenarioIdea[] = []; + for (const idea of ideas) { + const parsed = parseIdea(idea); + if (parsed) parsedIdeas.push(parsed); + } + + return { score, rationale, summary, ideas: parsedIdeas }; +} + +function scoreVariant(score: number): BadgeVariant { + if (score >= 70) return "default"; + if (score >= 40) return "secondary"; + return "outline"; +} + +function IdeaCard({ idea }: { idea: ScenarioIdea }) { + return ( + + + + + {idea.title} + + + + + {IDEA_TYPE_LABEL[idea.ideaType]} + + + {idea.impact} impact + + + + + + {idea.evidence} + + + + ); +} + +export function TangentScenario({ raw }: { raw: string }) { + const scenario = parseScenario(raw); + + if (!scenario) { + return ( + + {raw} + + ); + } + + return ( + + + + {scenario.score}/100 + + + Optimization potential + + + + + {scenario.rationale} + + + + {scenario.summary} + + + {scenario.ideas.length > 0 && ( + + + Ideas + + {scenario.ideas.map((idea, index) => ( + + ))} + + )} + + ); +} diff --git a/src/routes/v2/shared/components/AiChat/components/renderMarkdown.tsx b/src/routes/v2/shared/components/AiChat/components/renderMarkdown.tsx index 653a3a918..a8a11db4c 100644 --- a/src/routes/v2/shared/components/AiChat/components/renderMarkdown.tsx +++ b/src/routes/v2/shared/components/AiChat/components/renderMarkdown.tsx @@ -18,10 +18,13 @@ import { hydrateComponentReference } from "@/services/componentService"; import { ComponentChip } from "./ComponentChip"; import { EntityChip } from "./EntityChip"; +import { TangentScenario } from "./TangentScenario"; const ENTITY_PROTOCOL = "entity://"; const COMPONENT_PROTOCOL = "component://"; +const TANGENT_SCENARIO_CLASS = "language-tangent-scenario"; + const INLINE_CODE_CLASS = "rounded bg-muted px-1 py-0.5 text-xs font-mono"; const ComponentRefsContext = createContext< @@ -114,6 +117,11 @@ function MarkdownCode({ node: _node, ...rest }: ComponentProps<"code"> & { node?: unknown }) { + if (className?.includes(TANGENT_SCENARIO_CLASS)) { + const raw = String(children).replace(/\n$/, ""); + return ; + } + const match = className?.match(/language-(\w+)/); if (match) {