TangleML · maxy-shpfy · Jun 3, 2026
@@ -0,0 +1,36 @@
+import { useMutation } from "@tanstack/react-query";
+
+import useToastNotification from "@/hooks/useToastNotification";
+import type { ScenarioEntry } from "@/routes/tangent/idb/tangentDb";
+import {
+  createOpencodeSession,
+  resolveInstanceId,
+  sendAutoresearchMessage,
+} from "@/routes/tangent/services/autoresearchOpencode";
+import { buildAutoresearchPrompt } from "@/routes/tangent/services/autoresearchPrompt";
+
+/**
+ * Sends a scenario's autoresearch prompt to a Tangent OpenCode agent: resolves
+ * (or creates) an instance, opens a fresh session, and fires the prompt.
+ */
+export function useRunAutomatedResearch() {
+  const notify = useToastNotification();
+
+  return useMutation({
+    mutationFn: async (scenario: ScenarioEntry) => {
+      const instanceId = await resolveInstanceId();
+      const sessionId = await createOpencodeSession(
+        instanceId,
+        `Autoresearch: ${scenario.plan.name}`,
+      );
+      const prompt = buildAutoresearchPrompt(scenario);
+      await sendAutoresearchMessage(instanceId, sessionId, prompt);
+    },
+    onSuccess: () => {
+      notify("Automated research started", "success");
+    },
+    onError: (error) => {
+      notify(`Failed to start automated research: ${error}`, "error");
+    },
+  });
+}
@@ -0,0 +1,112 @@
+import { client } from "@/api/client.gen";
+import {
+  createInstanceApiTangentInstancesPost,
+  listInstancesApiTangentInstancesGet,
+} from "@/api/sdk.gen";
+
+/**
+ * Workspace directory the OpenCode agent runs in. OpenCode scopes sessions to
+ * a directory, passed as the `directory` query param (base64 `L3Jvb3Qvd29ya3NwYWNl`
+ * in the web UI URL).
+ */
+const OPENCODE_WORKSPACE_DIR = "/root/workspace";
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null;
+}
+
+interface OpencodeProxyPostOptions {
+  query?: Record<string, string>;
+  body?: unknown;
+}
+
+/**
+ * POST through the backend OpenCode reverse proxy
+ * (`/api/tangent/instances/{instanceId}/opencode/api/{path}`), which forwards
+ * the path, query string, and body to the in-pod OpenCode server.
+ *
+ * We call the hey-api client directly rather than the generated proxy fn so the
+ * `{path}` segment keeps its slashes (the generated fn would URL-encode them)
+ * and so we can attach a query and body.
+ */
+async function opencodeProxyPost(
+  instanceId: string,
+  path: string,
+  { query, body }: OpencodeProxyPostOptions = {},
+): Promise<unknown> {
+  const { data, error } = await client.post({
+    url: `/api/tangent/instances/${instanceId}/opencode/api/${path}`,
+    query,
+    body,
+  });
+
+  if (error) {
+    throw new Error(
+      `OpenCode request failed (${path}): ${JSON.stringify(error)}`,
+    );
+  }
+
+  return data;
+}
+
+/**
+ * Resolve a Tangent OpenCode instance to talk to, mirroring the backend
+ * `/api/tangent/go` logic: reuse the earliest existing instance, otherwise
+ * create one.
+ */
+export async function resolveInstanceId(): Promise<string> {
+  const { data, error } = await listInstancesApiTangentInstancesGet();
+  if (error) {
+    throw new Error(
+      `Failed to list Tangent instances: ${JSON.stringify(error)}`,
+    );
+  }
+
+  const instanceIds = (data?.instances ?? [])
+    .map((instance) => instance.instance_id)
+    .sort();
+  if (instanceIds.length > 0) {
+    return instanceIds[0];
+  }
+
+  const created = await createInstanceApiTangentInstancesPost();
+  if (created.error || !created.data) {
+    throw new Error(
+      `Failed to create Tangent instance: ${JSON.stringify(created.error)}`,
+    );
+  }
+  return created.data.instance_id;
+}
+
+/**
+ * Create a fresh OpenCode session in the workspace directory and return its id.
+ */
+export async function createOpencodeSession(
+  instanceId: string,
+  title: string,
+): Promise<string> {
+  const data = await opencodeProxyPost(instanceId, "session", {
+    query: { directory: OPENCODE_WORKSPACE_DIR },
+    body: { title },
+  });
+
+  if (!isRecord(data) || typeof data.id !== "string") {
+    throw new Error("OpenCode session response did not include an id");
+  }
+  return data.id;
+}
+
+/**
+ * Send a prompt to an OpenCode session without waiting for the agent to finish
+ * (fire-and-forget via `prompt_async`).
+ */
+export async function sendAutoresearchMessage(
+  instanceId: string,
+  sessionId: string,
+  text: string,
+): Promise<void> {
+  await opencodeProxyPost(instanceId, `session/${sessionId}/prompt_async`, {
+    query: { directory: OPENCODE_WORKSPACE_DIR },
+    body: { parts: [{ type: "text", text }] },
+  });
+}
@@ -0,0 +1,157 @@
+/**
+ * This file is a PoC for the autoresearch prompt.
+ */
+import yaml from "js-yaml";
+
+import type {
+  ScenarioEntry,
+  ScenarioIdea,
+} from "@/routes/tangent/idb/tangentDb";
+
+const UNVERIFIED = "UNVERIFIED - resolve from baseline run before submitting";
+
+/** Subset of the legacy prompt-builder `p` object, derived from a scenario. */
+interface AutoresearchParams {
+  id: string;
+  runId: string;
+  ideas: ScenarioIdea[];
+  metric?: string;
+  metricBaseline?: string;
+}
+
+/**
+ * Synthesize a minimal `scenario.yaml` from the saved scenario plan. Populated
+ * plan fields are passed through; unknown fields are emitted as explicit
+ * UNVERIFIED placeholders so the agent's pre-flight checks resolve them.
+ */
+function buildScenarioYaml(scenario: ScenarioEntry): string {
+  const { plan } = scenario;
+
+  const yamlObject: Record<string, unknown> = {
+    name: plan.name,
+    description: plan.description,
+    pipeline: plan.pipeline ?? {
+      path: UNVERIFIED,
+      baseline_run_id: scenario.run.runId,
+    },
+    metrics: plan.metrics ?? { target: { path: UNVERIFIED } },
+    search_space: plan.search_space ?? {},
+  };
+
+  if (plan.experiment_actions) {
+    yamlObject.experiment_actions = plan.experiment_actions;
+  }
+  if (plan.research) yamlObject.research = plan.research;
+  if (plan.budget) yamlObject.budget = plan.budget;
+  if (plan.timing) yamlObject.timing = plan.timing;
+  if (plan.failure_playbook)
+    yamlObject.failure_playbook = plan.failure_playbook;
+
+  return yaml.dump(yamlObject, { sortKeys: false });
+}
+
+/** Initial MEMORY.md scaffolding for a scenario that has not run yet. */
+function initialMemoryMd(p: AutoresearchParams): string {
+  const metricLine = p.metric
+    ? `Baseline ${p.metric}: ${p.metricBaseline ?? "unknown"}`
+    : "Baseline metric: unknown";
+  const ideasSection =
+    p.ideas.length > 0
+      ? p.ideas
+          .slice(0, 3)
+          .map((idea, i) => `${i + 1}. ${idea.title} [${idea.impact}]`)
+          .join("\n")
+      : "No ideas yet — researcher will generate in Step 1.";
+
+  return `# MEMORY.md
+
+## Best Config
+No experiments run yet — round 1 has not completed.
+${metricLine}
+
+## Key Lessons
+- Starting fresh. Researcher will analyze baseline metrics in Step 1.
+
+## Top Hypotheses to Test
+${ideasSection}
+
+## Active Runs
+(none yet — will be filled after round 1 submission)
+
+## Experiment Log
+(none)
+`;
+}
+
+interface BuildPromptOptions {
+  rounds?: number;
+  autoApprove?: boolean;
+}
+
+/**
+ * Build the autoresearch prompt for a scenario, ported from the legacy
+ * `generateRiverMessage`. The agent is asked to run `tangent auto` for one round
+ * and sync state to GCS.
+ */
+export function buildAutoresearchPrompt(
+  scenario: ScenarioEntry,
+  { rounds = 3, autoApprove = true }: BuildPromptOptions = {},
+): string {
+  const p: AutoresearchParams = {
+    id: scenario.id,
+    runId: scenario.run.runId,
+    ideas: scenario.ideas,
+  };
+
+  // todo: remove hardcoded GCS path
+  const gcsPath = `gs://shopify-discovery-relevance/tangent/scenarios/${p.id}/`;
+  const approveNote = autoApprove
+    ? "Auto-approve the first hypothesis — use the top researcher suggestion without waiting for human input."
+    : "Pause for hypothesis approval before submitting runs.";
+  const memory = initialMemoryMd(p).trim();
+
+  // Ensure the scenario.yaml name matches the pipeline ID to avoid ambiguity.
+  const yamlFixed = buildScenarioYaml(scenario)
+    .trim()
+    .replace(/^(name:\s*)(.+)/m, `$1${p.id}`);
+
+  const currentLocation = window.location.href;
+
+  return `Please run \`tangent auto\` for the following scenario — **1 round in this session** (${rounds} total planned across sessions).
+This is a multi-session workflow: run round 1, sync state to GCS, then hand off cleanly. The next session resumes from "Active Runs" in MEMORY.md.
+${approveNote}
+
+**Current URL:** \`${currentLocation}\`
+**Baseline run ID:** \`${p.runId}\`
+**Scenario ID (canonical):** \`${p.id}\`
+**GCS output path:** \`${gcsPath}\`
+Fork the pipeline from the baseline run — do not touch the mainline. After round 1 completes, write MEMORY.md, sessions/, and logs/ to the GCS path above.
+
+**Pre-flight checks (resolve before submitting any runs):**
+- [ ] All \`search_space[*].current\` values match the actual baseline run config (not guessed)
+- [ ] All \`search_space[*].range\` brackets extend at or above the baseline value for quality-lift experiments
+- [ ] All categorical \`choices\` are confirmed against source code validation (no guessed enum values)
+- [ ] \`metrics.target.path\` key exists in a downloaded baseline metrics artifact
+- [ ] Any UNVERIFIED fields in the YAML are resolved or explicitly noted in MEMORY.md before runs are submitted
+
+**Auto-approvals — no need to confirm these with me:**
+- Fix \`score_transform\` (or any categorical) choices to match what the pipeline code actually accepts.
+- If baseline HPs fall outside declared search space ranges, widen the ranges to include the baseline.
+- Resolve all metric paths from the latest baseline run artifact — override any placeholder names in the YAML.
+- If the primary metric is monotonically dominated by a boundary value, apply option B: add a hard floor at 10% above the lower bound and continue.
+- Submit sentinel immediately without waiting for confirmation.
+- Stage round 1 immediately after sentinel submission — do not wait for it to complete before planning.
+- Hold and ask only if: pipeline export fails, auth is broken, sentinel metric deviates >5% from stated baseline, or a guard metric key is missing from the artifact entirely.
+
+---
+**scenario.yaml:**
+\`\`\`yaml
+${yamlFixed}
+\`\`\`
+
+---
+**MEMORY.md:**
+\`\`\`
+${memory}
+\`\`\``;
+}