Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/routes/tangent/hooks/useRunAutomatedResearch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { useMutation } from "@tanstack/react-query";

import useToastNotification from "@/hooks/useToastNotification";
import type { ScenarioEntry } from "@/routes/tangent/idb/tangentDb";
import {
createOpencodeSession,
resolveInstanceId,
sendAutoresearchMessage,
} from "@/routes/tangent/services/autoresearchOpencode";
import { buildAutoresearchPrompt } from "@/routes/tangent/services/autoresearchPrompt";

/**
* Sends a scenario's autoresearch prompt to a Tangent OpenCode agent: resolves
* (or creates) an instance, opens a fresh session, and fires the prompt.
*/
export function useRunAutomatedResearch() {
const notify = useToastNotification();

return useMutation({
mutationFn: async (scenario: ScenarioEntry) => {
const instanceId = await resolveInstanceId();
const sessionId = await createOpencodeSession(
instanceId,
`Autoresearch: ${scenario.plan.name}`,
);
const prompt = buildAutoresearchPrompt(scenario);
await sendAutoresearchMessage(instanceId, sessionId, prompt);
},
onSuccess: () => {
notify("Automated research started", "success");
},
onError: (error) => {
notify(`Failed to start automated research: ${error}`, "error");
},
});
}
112 changes: 112 additions & 0 deletions src/routes/tangent/services/autoresearchOpencode.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import { client } from "@/api/client.gen";
import {
createInstanceApiTangentInstancesPost,
listInstancesApiTangentInstancesGet,
} from "@/api/sdk.gen";

/**
* Workspace directory the OpenCode agent runs in. OpenCode scopes sessions to
* a directory, passed as the `directory` query param (base64 `L3Jvb3Qvd29ya3NwYWNl`
* in the web UI URL).
*/
const OPENCODE_WORKSPACE_DIR = "/root/workspace";

function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null;
}

interface OpencodeProxyPostOptions {
query?: Record<string, string>;
body?: unknown;
}

/**
* POST through the backend OpenCode reverse proxy
* (`/api/tangent/instances/{instanceId}/opencode/api/{path}`), which forwards
* the path, query string, and body to the in-pod OpenCode server.
*
* We call the hey-api client directly rather than the generated proxy fn so the
* `{path}` segment keeps its slashes (the generated fn would URL-encode them)
* and so we can attach a query and body.
*/
async function opencodeProxyPost(
instanceId: string,
path: string,
{ query, body }: OpencodeProxyPostOptions = {},
): Promise<unknown> {
const { data, error } = await client.post({
url: `/api/tangent/instances/${instanceId}/opencode/api/${path}`,
query,
body,
});

if (error) {
throw new Error(
`OpenCode request failed (${path}): ${JSON.stringify(error)}`,
);
}

return data;
}

/**
* Resolve a Tangent OpenCode instance to talk to, mirroring the backend
* `/api/tangent/go` logic: reuse the earliest existing instance, otherwise
* create one.
*/
export async function resolveInstanceId(): Promise<string> {
const { data, error } = await listInstancesApiTangentInstancesGet();
if (error) {
throw new Error(
`Failed to list Tangent instances: ${JSON.stringify(error)}`,
);
}

const instanceIds = (data?.instances ?? [])
.map((instance) => instance.instance_id)
.sort();
if (instanceIds.length > 0) {
return instanceIds[0];
}

const created = await createInstanceApiTangentInstancesPost();
if (created.error || !created.data) {
throw new Error(
`Failed to create Tangent instance: ${JSON.stringify(created.error)}`,
);
}
return created.data.instance_id;
}

/**
* Create a fresh OpenCode session in the workspace directory and return its id.
*/
export async function createOpencodeSession(
instanceId: string,
title: string,
): Promise<string> {
const data = await opencodeProxyPost(instanceId, "session", {
query: { directory: OPENCODE_WORKSPACE_DIR },
body: { title },
});

if (!isRecord(data) || typeof data.id !== "string") {
throw new Error("OpenCode session response did not include an id");
}
return data.id;
}

/**
* Send a prompt to an OpenCode session without waiting for the agent to finish
* (fire-and-forget via `prompt_async`).
*/
export async function sendAutoresearchMessage(
instanceId: string,
sessionId: string,
text: string,
): Promise<void> {
await opencodeProxyPost(instanceId, `session/${sessionId}/prompt_async`, {
query: { directory: OPENCODE_WORKSPACE_DIR },
body: { parts: [{ type: "text", text }] },
});
}
157 changes: 157 additions & 0 deletions src/routes/tangent/services/autoresearchPrompt.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/**
* This file is a PoC for the autoresearch prompt.
*/
import yaml from "js-yaml";

import type {
ScenarioEntry,
ScenarioIdea,
} from "@/routes/tangent/idb/tangentDb";

const UNVERIFIED = "UNVERIFIED - resolve from baseline run before submitting";

/** Subset of the legacy prompt-builder `p` object, derived from a scenario. */
interface AutoresearchParams {
id: string;
runId: string;
ideas: ScenarioIdea[];
metric?: string;
metricBaseline?: string;
}

/**
* Synthesize a minimal `scenario.yaml` from the saved scenario plan. Populated
* plan fields are passed through; unknown fields are emitted as explicit
* UNVERIFIED placeholders so the agent's pre-flight checks resolve them.
*/
function buildScenarioYaml(scenario: ScenarioEntry): string {
const { plan } = scenario;

const yamlObject: Record<string, unknown> = {
name: plan.name,
description: plan.description,
pipeline: plan.pipeline ?? {
path: UNVERIFIED,
baseline_run_id: scenario.run.runId,
},
metrics: plan.metrics ?? { target: { path: UNVERIFIED } },
search_space: plan.search_space ?? {},
};

if (plan.experiment_actions) {
yamlObject.experiment_actions = plan.experiment_actions;
}
if (plan.research) yamlObject.research = plan.research;
if (plan.budget) yamlObject.budget = plan.budget;
if (plan.timing) yamlObject.timing = plan.timing;
if (plan.failure_playbook)
yamlObject.failure_playbook = plan.failure_playbook;

return yaml.dump(yamlObject, { sortKeys: false });
}

/** Initial MEMORY.md scaffolding for a scenario that has not run yet. */
function initialMemoryMd(p: AutoresearchParams): string {
const metricLine = p.metric
? `Baseline ${p.metric}: ${p.metricBaseline ?? "unknown"}`
: "Baseline metric: unknown";
const ideasSection =
p.ideas.length > 0
? p.ideas
.slice(0, 3)
.map((idea, i) => `${i + 1}. ${idea.title} [${idea.impact}]`)
.join("\n")
: "No ideas yet — researcher will generate in Step 1.";

return `# MEMORY.md

## Best Config
No experiments run yet — round 1 has not completed.
${metricLine}

## Key Lessons
- Starting fresh. Researcher will analyze baseline metrics in Step 1.

## Top Hypotheses to Test
${ideasSection}

## Active Runs
(none yet — will be filled after round 1 submission)

## Experiment Log
(none)
`;
}

interface BuildPromptOptions {
rounds?: number;
autoApprove?: boolean;
}

/**
* Build the autoresearch prompt for a scenario, ported from the legacy
* `generateRiverMessage`. The agent is asked to run `tangent auto` for one round
* and sync state to GCS.
*/
export function buildAutoresearchPrompt(
scenario: ScenarioEntry,
{ rounds = 3, autoApprove = true }: BuildPromptOptions = {},
): string {
const p: AutoresearchParams = {
id: scenario.id,
runId: scenario.run.runId,
ideas: scenario.ideas,
};

// todo: remove hardcoded GCS path
const gcsPath = `gs://shopify-discovery-relevance/tangent/scenarios/${p.id}/`;
const approveNote = autoApprove
? "Auto-approve the first hypothesis — use the top researcher suggestion without waiting for human input."
: "Pause for hypothesis approval before submitting runs.";
const memory = initialMemoryMd(p).trim();

// Ensure the scenario.yaml name matches the pipeline ID to avoid ambiguity.
const yamlFixed = buildScenarioYaml(scenario)
.trim()
.replace(/^(name:\s*)(.+)/m, `$1${p.id}`);

const currentLocation = window.location.href;

return `Please run \`tangent auto\` for the following scenario — **1 round in this session** (${rounds} total planned across sessions).
This is a multi-session workflow: run round 1, sync state to GCS, then hand off cleanly. The next session resumes from "Active Runs" in MEMORY.md.
${approveNote}

**Current URL:** \`${currentLocation}\`
**Baseline run ID:** \`${p.runId}\`
**Scenario ID (canonical):** \`${p.id}\`
**GCS output path:** \`${gcsPath}\`
Fork the pipeline from the baseline run — do not touch the mainline. After round 1 completes, write MEMORY.md, sessions/, and logs/ to the GCS path above.

**Pre-flight checks (resolve before submitting any runs):**
- [ ] All \`search_space[*].current\` values match the actual baseline run config (not guessed)
- [ ] All \`search_space[*].range\` brackets extend at or above the baseline value for quality-lift experiments
- [ ] All categorical \`choices\` are confirmed against source code validation (no guessed enum values)
- [ ] \`metrics.target.path\` key exists in a downloaded baseline metrics artifact
- [ ] Any UNVERIFIED fields in the YAML are resolved or explicitly noted in MEMORY.md before runs are submitted

**Auto-approvals — no need to confirm these with me:**
- Fix \`score_transform\` (or any categorical) choices to match what the pipeline code actually accepts.
- If baseline HPs fall outside declared search space ranges, widen the ranges to include the baseline.
- Resolve all metric paths from the latest baseline run artifact — override any placeholder names in the YAML.
- If the primary metric is monotonically dominated by a boundary value, apply option B: add a hard floor at 10% above the lower bound and continue.
- Submit sentinel immediately without waiting for confirmation.
- Stage round 1 immediately after sentinel submission — do not wait for it to complete before planning.
- Hold and ask only if: pipeline export fails, auth is broken, sentinel metric deviates >5% from stated baseline, or a guard metric key is missing from the artifact entirely.

---
**scenario.yaml:**
\`\`\`yaml
${yamlFixed}
\`\`\`

---
**MEMORY.md:**
\`\`\`
${memory}
\`\`\``;
}
Loading
Loading