Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/agent/agents/runViewDispatcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
} from "./dispatcherRuntime";
import { createDebugAssistantAgent } from "./subagents/debugAssistant";
import { createGeneralHelpAgent } from "./subagents/generalHelp";
import { createTangentResearcherAgent } from "./subagents/tangentResearcher";

function formatCurrentRunSection(context: AgentContext): string {
if (context.mode !== "runView") {
Expand All @@ -39,6 +40,7 @@ function formatCurrentRunSection(context: AgentContext): string {
async function buildRunViewAgent(session: AgentSession): Promise<Agent> {
const generalHelp = createGeneralHelpAgent(session);
const debugAssistant = createDebugAssistantAgent(session);
const tangentResearcher = createTangentResearcherAgent(session);

const instructions = `${runViewDispatcherPrompt}\n\n${formatCurrentRunSection(session.context)}`;

Expand All @@ -57,6 +59,11 @@ async function buildRunViewAgent(session: AgentSession): Promise<Agent> {
toolDescription:
"Ask the debug-assistant specialist to inspect or explain a pipeline run from execution details, container state, and logs. Read-only — cannot edit the spec or submit runs. Input: a clear question that names the run id, e.g. 'Explain what run 12345 did and its outcome.' or 'Why did run 12345 fail?'.",
}),
tangentResearcher.asTool({
toolName: "create_optimization_scenario",
toolDescription:
"Ask the Tangent Researcher to analyze the current run for ML optimization potential and produce a 0-100 score plus prioritized hyperparameter-tuning and experiment ideas. Read-only. Input must name the run id, e.g. 'Analyze run 12345 for optimization opportunities.'.",
}),
],
});
attachObservabilityHooks(agent, session.emitStatus);
Expand Down
52 changes: 52 additions & 0 deletions src/agent/agents/subagents/tangentResearcher.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/**
* Tangent Researcher sub-agent — scores a pipeline run for ML
* optimization potential and proposes prioritized tuning/experiment
* ideas.
*
* Read-only by design: it inspects run metadata and (optionally) the
* pipeline spec, but has no CSOM mutation or run-submission tools. Runs
* with high reasoning effort because scoring optimization potential is a
* judgment-heavy task.
*
* The session's `recentRuns` are appended to the system prompt at agent
* creation time (per turn) so the model can resolve "this run" / "the
* latest run" without an extra tool call.
*/
import { Agent } from "@openai/agents";

import { requireOrchestratorModel } from "../../config";

Check failure on line 17 in src/agent/agents/subagents/tangentResearcher.ts

View workflow job for this annotation

GitHub Actions / Type checking

Module '"../../config"' has no exported member 'requireOrchestratorModel'.

Check failure on line 17 in src/agent/agents/subagents/tangentResearcher.ts

View workflow job for this annotation

GitHub Actions / Type checking

Module '"../../config"' has no exported member 'requireOrchestratorModel'.
import { attachObservabilityHooks } from "../../middleware/observability";
import tangentResearcherPrompt from "../../prompts/tangentResearcher.md?raw";
import type { AgentSession, RecentPipelineRun } from "../../session";
import { createCsomTools } from "../../tools/csomTools";
import { createRunTools } from "../../tools/runTools";

const RECENT_RUNS_PROMPT_LIMIT = 5;

function formatRecentRunsSection(runs: RecentPipelineRun[]): string {
if (runs.length === 0) return "## Recent runs\n\nNo recent runs available.";
const lines = runs.slice(0, RECENT_RUNS_PROMPT_LIMIT).map((run) => {
const status = run.status ? ` — status: ${run.status}` : "";
return `- run ${run.id} (root execution ${run.root_execution_id}, created ${run.created_at})${status}`;
});
return `## Recent runs\n\n${lines.join("\n")}`;
}

export function createTangentResearcherAgent(session: AgentSession): Agent {
const csom = createCsomTools(session.bridge);
const runTools = createRunTools(session.bridge);

const instructions = `${tangentResearcherPrompt}\n\n${formatRecentRunsSection(session.recentRuns)}`;

const agent = new Agent({
name: "tangent-researcher",
handoffDescription: `Score a pipeline run 0-100 for ML optimization potential and propose prioritized
hyperparameter-tuning and experiment ideas. Read-only — cannot edit the pipeline or submit runs.`,
instructions,
tools: [runTools.getRunStatus, csom.getPipelineState],
model: requireOrchestratorModel(),
modelSettings: { reasoning: { effort: "high" } },
});
attachObservabilityHooks(agent, session.emitStatus);
return agent;
}
11 changes: 7 additions & 4 deletions src/agent/prompts/runViewDispatcher.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ You are the **Tangle Run View Assistant**, the entry point for the AI assistant

Each specialist is exposed to you as a tool. Calling a tool runs the specialist's own sub-agent loop and returns its final response as a string.

| Tool | When to call it |
| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `ask_general_help` | Any question about Tangle concepts, features, how things work, best practices, getting started, or documentation lookups (e.g. "what is a pipeline?", "what does this component do?", "what are subgraphs?"). Not specific to the current run. |
| `ask_debug_assistant` | Any request to inspect, diagnose, or **explain the current run** — "what did this run do?", "why did it fail?", "what went wrong with run 12345?", "show me the error", "explain the outcome". Read-only: it inspects execution details, container state, and logs only. |
| Tool | When to call it |
| ------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `ask_general_help` | Any question about Tangle concepts, features, how things work, best practices, getting started, or documentation lookups (e.g. "what is a pipeline?", "what does this component do?", "what are subgraphs?"). Not specific to the current run. |
| `ask_debug_assistant` | Any request to inspect, diagnose, or **explain the current run** — "what did this run do?", "why did it fail?", "what went wrong with run 12345?", "show me the error", "explain the outcome". Read-only: it inspects execution details, container state, and logs only. |
| `create_optimization_scenario` | Any request to **optimize, tune, or improve the model/pipeline** — "how can I optimize this?", "what hyperparameters should I tune?", "where can Tangent help?", "improve this model", "find experiment ideas". Read-only: it scores the run 0-100 for ML optimization potential and proposes prioritized ideas. |

## Calling a specialist

Expand All @@ -29,6 +30,8 @@ When a specialist tool returns, **relay its response** to the user. Specialists

Preserve those links exactly — do not rewrite them as bold, italic, or backticks, and do not invent ids. Return the tool output essentially as-is; you may add at most one short framing sentence if it genuinely helps, but the content must come from the specialist. Never announce or describe the tool calls themselves to the user.

When `create_optimization_scenario` returns a fenced ` ```tangent-scenario ` code block, **relay that block verbatim** — do not unwrap it, reformat it, summarize it, or add prose around it. The UI renders it into interactive idea cards.

## When NOT to call a tool

- **Mutation/run requests** ("fix this", "rerun it", "change this input") — this view is read-only. Explain plainly that editing and running happen in the pipeline Editor, then offer to explain the run instead.
Expand Down
57 changes: 57 additions & 0 deletions src/agent/prompts/tangentResearcher.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Tangent Researcher — System Prompt

You are the **Tangent Researcher**, an ML optimization expert who identifies hyperparameter tuning opportunities in Shopify's Tangle ML pipelines. Score pipelines 0-100 based on optimization potential: high scores for pipelines with manual grid search, no Bayesian optimization, unexplored hyperparameter space, stale tuning, or complex architectures with many knobs.

You are **read-only** — you inspect a run, you never edit the pipeline or submit runs.

## Your Workflow

1. The run id you are analyzing is provided in your input. If the input says "the current run" / "this run" without a number, consult the **Recent runs** section appended below and pick the most recent entry.
2. Call `get_run_status(runId)` first to obtain the full run metadata and derived overall status. This is your highest-signal call — the returned `run` JSON is the basis for your scoring.
3. If you need to point at a specific task in the pipeline spec, call `get_pipeline_state` once.
4. Score the run for ML optimization potential and produce prioritized experiment ideas.

## Scoring guidance

Score 0-100 based on optimization potential. Award **high scores** for pipelines with:

- Manual grid search (no automated search strategy).
- No Bayesian optimization.
- Large, unexplored hyperparameter space.
- Stale tuning (parameters look like defaults or haven't been revisited).
- Complex architectures with many tunable knobs.

Award **low scores** for pipelines that are already well-tuned, have a narrow hyperparameter surface, or where optimization would yield little.

## Idea taxonomy

Each idea MUST include an `ideaType` tag from this enum:

- `feature_engineering`: adding/transforming/removing input features (cross-shop signals, interaction terms, embedding pooling, etc.).
- `hyperparameter_optimization`: tuning existing knobs (LR, schedule, alpha, temperature, batch size, depth).
- `input_data`: changing the training data (new label sources, sample mixes, negative mining, dataset filtering).
- `model_architecture`: structural changes (layer freezing, new heads, swapping backbones, capacity changes).

When an idea spans two types, pick the one capturing the _primary_ change.

## Response format

Respond with a single fenced code block tagged `tangent-scenario` containing ONLY the JSON object (no other prose before or after it, no other markdown). The UI recognizes this block and renders each idea as a card.

```tangent-scenario
{
"score": <integer 0-100>,
"rationale": "<2 concise sentences explaining the opportunity score>",
"summary": "<2-3 paragraph analysis: what the pipeline does, where Tangent helps, what experiments to prioritize>",
"ideas": [
{
"title": "<short idea name>",
"ideaType": "<one of: feature_engineering | hyperparameter_optimization | input_data | model_architecture>",
"impact": "high|medium|low",
"evidence": "<1 sentence from the run data>"
}
]
}
```

Do not wrap the block in additional commentary. The entire response is the fenced block.
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import { Badge } from "@/components/ui/badge";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { BlockStack, InlineStack } from "@/components/ui/layout";
import { Paragraph, Text } from "@/components/ui/typography";

type Impact = "high" | "medium" | "low";

type IdeaType =
| "feature_engineering"
| "hyperparameter_optimization"
| "input_data"
| "model_architecture";

interface ScenarioIdea {
title: string;
ideaType: IdeaType;
impact: Impact;
evidence: string;
}

interface Scenario {
score: number;
rationale: string;
summary: string;
ideas: ScenarioIdea[];
}

const IDEA_TYPE_LABEL: Record<IdeaType, string> = {
feature_engineering: "Feature engineering",
hyperparameter_optimization: "Hyperparameter optimization",
input_data: "Input data",
model_architecture: "Model architecture",
};

type BadgeVariant = "default" | "secondary" | "outline";

const IMPACT_VARIANT: Record<Impact, BadgeVariant> = {
high: "default",
medium: "secondary",
low: "outline",
};

function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null;
}

function parseIdea(value: unknown): ScenarioIdea | null {
if (!isRecord(value)) return null;
const { title, ideaType, impact, evidence } = value;
if (typeof title !== "string") return null;
if (
ideaType !== "feature_engineering" &&
ideaType !== "hyperparameter_optimization" &&
ideaType !== "input_data" &&
ideaType !== "model_architecture"
) {
return null;
}
if (impact !== "high" && impact !== "medium" && impact !== "low") return null;
if (typeof evidence !== "string") return null;
return { title, ideaType, impact, evidence };
}

function parseScenario(raw: string): Scenario | null {
let value: unknown;
try {
value = JSON.parse(raw);
} catch {
return null;
}
if (!isRecord(value)) return null;
const { score, rationale, summary, ideas } = value;
if (typeof score !== "number") return null;
if (typeof rationale !== "string") return null;
if (typeof summary !== "string") return null;
if (!Array.isArray(ideas)) return null;

const parsedIdeas: ScenarioIdea[] = [];
for (const idea of ideas) {
const parsed = parseIdea(idea);
if (parsed) parsedIdeas.push(parsed);
}

return { score, rationale, summary, ideas: parsedIdeas };
}

function scoreVariant(score: number): BadgeVariant {
if (score >= 70) return "default";
if (score >= 40) return "secondary";
return "outline";
}

function IdeaCard({ idea }: { idea: ScenarioIdea }) {
return (
<Card className="gap-3 py-3">
<CardHeader className="px-4">
<CardTitle>
<Text as="span" size="sm" weight="semibold">
{idea.title}
</Text>
</CardTitle>
<InlineStack gap="1">
<Badge variant="outline" size="sm" shape="rounded">
{IDEA_TYPE_LABEL[idea.ideaType]}
</Badge>
<Badge
variant={IMPACT_VARIANT[idea.impact]}
size="sm"
shape="rounded"
>
{idea.impact} impact
</Badge>
</InlineStack>
</CardHeader>
<CardContent className="px-4">
<Text as="p" size="sm" tone="subdued">
{idea.evidence}
</Text>
</CardContent>
</Card>
);
}

export function TangentScenario({ raw }: { raw: string }) {
const scenario = parseScenario(raw);

if (!scenario) {
return (
<Paragraph size="sm" className="my-1">
{raw}
</Paragraph>
);
}

return (
<BlockStack gap="3">
<InlineStack gap="2" blockAlign="center">
<Badge variant={scoreVariant(scenario.score)} shape="rounded">
{scenario.score}/100
</Badge>
<Text as="span" size="sm" weight="semibold">
Optimization potential
</Text>
</InlineStack>

<Text as="p" size="sm" tone="subdued">
{scenario.rationale}
</Text>

<Paragraph size="sm" className="whitespace-pre-line leading-relaxed">
{scenario.summary}
</Paragraph>

{scenario.ideas.length > 0 && (
<BlockStack gap="2">
<Text as="span" size="sm" weight="semibold">
Ideas
</Text>
{scenario.ideas.map((idea, index) => (
<IdeaCard key={`${idea.title}-${index}`} idea={idea} />
))}
</BlockStack>
)}
</BlockStack>
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@ import { hydrateComponentReference } from "@/services/componentService";

import { ComponentChip } from "./ComponentChip";
import { EntityChip } from "./EntityChip";
import { TangentScenario } from "./TangentScenario";

const ENTITY_PROTOCOL = "entity://";
const COMPONENT_PROTOCOL = "component://";

const TANGENT_SCENARIO_CLASS = "language-tangent-scenario";

const INLINE_CODE_CLASS = "rounded bg-muted px-1 py-0.5 text-xs font-mono";

const ComponentRefsContext = createContext<
Expand Down Expand Up @@ -114,6 +117,11 @@ function MarkdownCode({
node: _node,
...rest
}: ComponentProps<"code"> & { node?: unknown }) {
if (className?.includes(TANGENT_SCENARIO_CLASS)) {
const raw = String(children).replace(/\n$/, "");
return <TangentScenario raw={raw} />;
}

const match = className?.match(/language-(\w+)/);

if (match) {
Expand Down
Loading