diff --git a/apps/web/src/content/docs/docs/evaluation/rubrics.mdx b/apps/web/src/content/docs/docs/evaluation/rubrics.mdx
index 4f9f5587e..14759e797 100644
--- a/apps/web/src/content/docs/docs/evaluation/rubrics.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/rubrics.mdx
@@ -122,6 +122,23 @@ score = sum(criterion_score / 10 * weight) / sum(total_weights)
 
 Write rubric criteria directly in `assertions`. If you want help choosing between plain assertions, deterministic graders, and rubric or LLM-based grading, use the `agentv-eval-writer` skill. Keep the grader choice driven by the criteria rather than one fixed recipe.
 
+## Context Available to Rubric Graders
+
+Rubric assertions automatically receive the full evaluation context, not just the agent's text answer. When present, the following are appended to the grader prompt:
+
+- **`file_changes`** — unified diff of workspace file changes (when `workspace` is configured)
+- **`tool_calls`** — formatted summary of tool calls from agent execution (tool name + key inputs)
+
+This means rubric criteria can reason about *what the agent did*, not only what it said. For example, you can check whether an agent invoked a specific skill:
+
+```yaml
+assertions:
+  - The agent invoked the acme-deploy skill
+  - The agent used Read to inspect the config file before editing
+```
+
+This is a lightweight alternative to the `skill-trigger` evaluator when you want to check tool usage with natural-language criteria.
+
 ## Combining with Other Graders
 
 Rubrics work alongside code and LLM graders:
diff --git a/apps/web/src/content/docs/docs/graders/llm-graders.mdx b/apps/web/src/content/docs/docs/graders/llm-graders.mdx
index 3f9cd969c..14f88ba6a 100644
--- a/apps/web/src/content/docs/docs/graders/llm-graders.mdx
+++ b/apps/web/src/content/docs/docs/graders/llm-graders.mdx
@@ -73,6 +73,7 @@ Score the response from 0.0 to 1.0 based on:
 | `expected_output` | Full resolved expected array, JSON-serialized |
 | `output` | Full provider output array, JSON-serialized |
 | `file_changes` | Unified diff of workspace file changes (populated when `workspace` is configured) |
+| `tool_calls` | Formatted summary of tool calls from agent execution (tool name + key inputs per call) |
 
 ## Per-Grader Target
 
@@ -228,6 +229,7 @@ Derived strings injected into grader prompts:
 | `expected_output` | Full resolved expected array, JSON-serialized |
 | `output` | Full provider output array, JSON-serialized |
 | `file_changes` | Unified diff of workspace file changes (populated when `workspace` is configured) |
+| `tool_calls` | Formatted summary of tool calls from agent execution (tool name + key inputs per call) |
 
 **Example flow:**
 
diff --git a/examples/features/tool-calls-template/evals/eval.yaml b/examples/features/tool-calls-template/evals/eval.yaml
new file mode 100644
index 000000000..18976db70
--- /dev/null
+++ b/examples/features/tool-calls-template/evals/eval.yaml
@@ -0,0 +1,39 @@
+# Tool Calls Template Variable Demo
+#
+# Demonstrates using {{ tool_calls }} with rubric assertions to check
+# whether an agent invoked the right skills — without needing the
+# skill-trigger evaluator.
+#
+# Skills live in workspace/.agents/skills/. The before_all hook copies
+# them to .claude/skills/ so copilot and other providers can discover them.
+#
+# Run:
+#   bun agentv eval examples/features/tool-calls-template/evals/eval.yaml --target copilot
+
+name: tool-calls-template
+description: Rubric assertions with {{ tool_calls }} for skill verification
+
+workspace:
+  template: ../workspace/
+  hooks:
+    before_all:
+      command:
+        - bash
+        - -c
+        - 'WS=$(python3 -c "import json,sys;print(json.load(sys.stdin)[\"workspace_path\"])") && mkdir -p "$WS/.claude" && cp -r "$WS/.agents/skills" "$WS/.claude/skills"'
+
+tests:
+  - id: deploy-skill-triggered
+    input: How do I deploy payments-api to production?
+    assertions:
+      - The agent invoked the acme-deploy skill
+
+  - id: rollback-skill-triggered
+    input: I need to roll back user-service in staging, what's the procedure?
+    assertions:
+      - The agent invoked the acme-deploy skill
+
+  - id: no-skill-for-unrelated
+    input: Write a Python function that parses JSON logs and extracts error messages.
+    assertions:
+      - The tool_calls section does not contain any entry starting with "Skill:" (file creation, Read, Edit, and Bash are fine)
diff --git a/examples/features/tool-calls-template/workspace/.agents/skills/acme-deploy/SKILL.md b/examples/features/tool-calls-template/workspace/.agents/skills/acme-deploy/SKILL.md
new file mode 100644
index 000000000..7ab31f686
--- /dev/null
+++ b/examples/features/tool-calls-template/workspace/.agents/skills/acme-deploy/SKILL.md
@@ -0,0 +1,49 @@
+---
+name: acme-deploy
+description: Use when the user asks about deploying services, checking deployment status, rollback procedures, or release management at Acme Corp
+---
+
+# Acme Corp Deployment Procedures
+
+## Overview
+
+Internal deployment runbook for Acme Corp services. All deployments follow the Trident release pipeline.
+
+## Deployment Commands
+
+### Deploy to staging
+```bash
+trident push --env staging --service <service-name> --tag <git-sha>
+```
+
+### Promote to production
+```bash
+trident promote --from staging --to prod --service <service-name> --approval-ticket <JIRA-ID>
+```
+Production deploys require a JIRA approval ticket (prefix: DEPLOY-).
+
+### Rollback
+```bash
+trident rollback --env <env> --service <service-name> --to-version <previous-tag>
+```
+Rollbacks auto-notify #ops-alerts in Slack.
+
+### Check deployment status
+```bash
+trident status --env <env> --service <service-name>
+```
+
+## Service Registry
+
+| Service | Owner Team | Staging URL | Prod URL |
+|---------|-----------|-------------|----------|
+| payments-api | Platform | payments.staging.acme.internal | payments.acme.internal |
+| user-service | Identity | users.staging.acme.internal | users.acme.internal |
+| notifications | Engagement | notify.staging.acme.internal | notify.acme.internal |
+
+## Rules
+
+- All prod deploys require a DEPLOY- JIRA ticket
+- Staging deploys are auto-approved during business hours (9am-5pm PT)
+- Rollbacks bypass approval but require post-mortem within 48h
+- Deploy freezes are announced in #engineering-announcements
diff --git a/packages/core/src/evaluation/graders/format-tool-calls.ts b/packages/core/src/evaluation/graders/format-tool-calls.ts
new file mode 100644
index 000000000..148df281d
--- /dev/null
+++ b/packages/core/src/evaluation/graders/format-tool-calls.ts
@@ -0,0 +1,79 @@
+/**
+ * Formats tool calls from agent output messages into a human-readable summary.
+ *
+ * Used by `{{ tool_calls }}` template variable in LLM grader prompts.
+ * Extracts key input fields per tool to keep the summary compact:
+ *   - Skill: `skill` arg
+ *   - Read/Write/Edit: `file_path`
+ *   - Bash: `command`
+ *   - Grep/Glob: `pattern`
+ *   - Other tools: first string-valued input field (if any)
+ *
+ * Returns empty string when there are no tool calls (template variable resolves to '').
+ */
+
+import type { Message } from '../providers/types.js';
+
+/**
+ * Key input fields to extract per tool name.
+ * Order matters — first matching field wins.
+ */
+const KEY_INPUT_FIELDS: ReadonlyMap<string, readonly string[]> = new Map([
+  ['Skill', ['skill']],
+  ['Read', ['file_path']],
+  ['Write', ['file_path']],
+  ['Edit', ['file_path']],
+  ['Bash', ['command']],
+  ['Grep', ['pattern']],
+  ['Glob', ['pattern']],
+]);
+
+/** Fallback: pick the first short string-valued field from input. */
+const MAX_FALLBACK_LENGTH = 120;
+
+export function formatToolCalls(output: readonly Message[] | undefined): string {
+  if (!output) return '';
+
+  const lines: string[] = [];
+
+  for (const message of output) {
+    if (!message.toolCalls) continue;
+    for (const call of message.toolCalls) {
+      const toolName = call.tool ?? 'unknown';
+      const detail = extractKeyDetail(toolName, call.input);
+      lines.push(detail ? `- ${toolName}: ${detail}` : `- ${toolName}`);
+    }
+  }
+
+  return lines.length > 0 ? lines.join('\n') : '';
+}
+
+function extractKeyDetail(toolName: string, input: unknown): string {
+  if (!input || typeof input !== 'object') return '';
+  const record = input as Record<string, unknown>;
+
+  // Try known key fields for this tool
+  const knownFields = KEY_INPUT_FIELDS.get(toolName);
+  if (knownFields) {
+    for (const field of knownFields) {
+      const value = record[field];
+      if (typeof value === 'string' && value.length > 0) {
+        return truncate(value);
+      }
+    }
+  }
+
+  // Fallback: first short string-valued field
+  for (const value of Object.values(record)) {
+    if (typeof value === 'string' && value.length > 0 && value.length <= MAX_FALLBACK_LENGTH) {
+      return truncate(value);
+    }
+  }
+
+  return '';
+}
+
+function truncate(value: string, maxLen = 120): string {
+  if (value.length <= maxLen) return value;
+  return `${value.slice(0, maxLen)}…`;
+}
diff --git a/packages/core/src/evaluation/graders/index.ts b/packages/core/src/evaluation/graders/index.ts
index 107582aee..c87ff4a59 100644
--- a/packages/core/src/evaluation/graders/index.ts
+++ b/packages/core/src/evaluation/graders/index.ts
@@ -55,6 +55,8 @@ export {
 } from './llm-grader.js';
 export type { LlmGraderOptions } from './llm-grader.js';
 
+export { formatToolCalls } from './format-tool-calls.js';
+
 export { SkillTriggerGrader } from './skill-trigger.js';
 
 export { assembleLlmGraderPrompt } from './llm-grader-prompt.js';
diff --git a/packages/core/src/evaluation/graders/llm-grader-prompt.ts b/packages/core/src/evaluation/graders/llm-grader-prompt.ts
index 1cc7774bb..fc50aae4a 100644
--- a/packages/core/src/evaluation/graders/llm-grader-prompt.ts
+++ b/packages/core/src/evaluation/graders/llm-grader-prompt.ts
@@ -24,6 +24,7 @@ export function assembleLlmGraderPrompt(input: {
   evaluatorConfig?: LlmGraderConfig;
   output?: readonly Message[];
   fileChanges?: string;
+  toolCalls?: string;
   graderTemplateOverride?: string;
 }): LlmGraderPromptAssembly {
   const {
@@ -32,6 +33,7 @@ export function assembleLlmGraderPrompt(input: {
     promptInputs,
     evaluatorConfig,
     fileChanges,
+    toolCalls,
     graderTemplateOverride,
   } = input;
 
@@ -41,12 +43,19 @@ export function assembleLlmGraderPrompt(input: {
   if (rubrics && rubrics.length > 0) {
     const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
     if (hasScoreRanges) {
-      return assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges);
+      return assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls);
     }
-    return assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges);
+    return assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls);
   }
 
-  return assembleFreeform(evalCase, candidate, promptInputs, fileChanges, graderTemplateOverride);
+  return assembleFreeform(
+    evalCase,
+    candidate,
+    promptInputs,
+    fileChanges,
+    toolCalls,
+    graderTemplateOverride,
+  );
 }
 
 function assembleFreeform(
@@ -54,6 +63,7 @@ function assembleFreeform(
   candidate: string,
   promptInputs: PromptInputs,
   fileChanges?: string,
+  toolCalls?: string,
   graderTemplateOverride?: string,
 ): LlmGraderPromptAssembly {
   const formattedQuestion =
@@ -67,6 +77,7 @@ function assembleFreeform(
     [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (evalCase.reference_answer ?? '').trim(),
     [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
     [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? '',
+    [TEMPLATE_VARIABLES.TOOL_CALLS]: toolCalls ?? '',
     // Deprecated aliases
     [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
     [TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
@@ -77,10 +88,13 @@ function assembleFreeform(
   const template = graderTemplateOverride ?? DEFAULT_GRADER_TEMPLATE;
   let userPrompt = substituteVariables(template, variables);
 
-  // Append file_changes section to default template only when present
+  // Append file_changes and tool_calls sections to default template only when present
   if (fileChanges && !graderTemplateOverride) {
     userPrompt += `\n\n[[ ## file_changes ## ]]\n${fileChanges}`;
   }
+  if (toolCalls && !graderTemplateOverride) {
+    userPrompt += `\n\n[[ ## tool_calls ## ]]\n${toolCalls}`;
+  }
 
   return {
     systemPrompt,
@@ -96,6 +110,7 @@ function assembleChecklist(
   promptInputs: PromptInputs,
   rubrics: readonly RubricItem[],
   fileChanges?: string,
+  toolCalls?: string,
 ): LlmGraderPromptAssembly {
   const formattedQuestion =
     promptInputs.question && promptInputs.question.trim().length > 0
@@ -123,6 +138,10 @@ function assembleChecklist(
     parts.push('[[ ## file_changes ## ]]', fileChanges, '');
   }
 
+  if (toolCalls) {
+    parts.push('[[ ## tool_calls ## ]]', toolCalls, '');
+  }
+
   parts.push('[[ ## rubrics ## ]]');
 
   for (const rubric of rubrics) {
@@ -150,6 +169,7 @@ function assembleScoreRange(
   promptInputs: PromptInputs,
   rubrics: readonly RubricItem[],
   fileChanges?: string,
+  toolCalls?: string,
 ): LlmGraderPromptAssembly {
   const formattedQuestion =
     promptInputs.question && promptInputs.question.trim().length > 0
@@ -178,6 +198,10 @@ function assembleScoreRange(
     parts.push('[[ ## file_changes ## ]]', fileChanges, '');
   }
 
+  if (toolCalls) {
+    parts.push('[[ ## tool_calls ## ]]', toolCalls, '');
+  }
+
   parts.push('[[ ## scoring_criteria ## ]]');
 
   for (const rubric of rubrics) {
diff --git a/packages/core/src/evaluation/graders/llm-grader.ts b/packages/core/src/evaluation/graders/llm-grader.ts
index 47812ef47..15e41ab99 100644
--- a/packages/core/src/evaluation/graders/llm-grader.ts
+++ b/packages/core/src/evaluation/graders/llm-grader.ts
@@ -272,6 +272,7 @@ export class LlmGrader implements Grader {
       [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? '').trim(),
       [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
       [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? '',
+      [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? '',
       // Deprecated aliases — same values as the primary variables above
       [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
       [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
@@ -290,10 +291,13 @@ export class LlmGrader implements Grader {
 
     let userPrompt = substituteVariables(graderTemplate, variables);
 
-    // Append file_changes section to default template only when present
+    // Append file_changes and tool_calls sections to default template only when present
     if (context.fileChanges && !context.graderTemplateOverride && !this.graderTemplate) {
       userPrompt += `\n\n[[ ## file_changes ## ]]\n${context.fileChanges}`;
     }
+    if (context.toolCalls && !context.graderTemplateOverride && !this.graderTemplate) {
+      userPrompt += `\n\n[[ ## tool_calls ## ]]\n${context.toolCalls}`;
+    }
 
     const graderRawRequest: JsonObject = {
       userPrompt,
@@ -691,6 +695,7 @@ export class LlmGrader implements Grader {
       [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
       [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? '').trim(),
       [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? '',
+      [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? '',
       // Deprecated aliases
       [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
       [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
@@ -726,6 +731,10 @@ export class LlmGrader implements Grader {
       parts.push('[[ ## file_changes ## ]]', context.fileChanges, '');
     }
 
+    if (context.toolCalls) {
+      parts.push('[[ ## tool_calls ## ]]', context.toolCalls, '');
+    }
+
     if (rubrics && rubrics.length > 0) {
       parts.push('[[ ## rubrics ## ]]');
       for (const rubric of rubrics) {
@@ -766,6 +775,7 @@ export class LlmGrader implements Grader {
         [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
         [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? '').trim(),
         [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? '',
+        [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? '',
         // Deprecated aliases
         [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
         [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
@@ -801,6 +811,10 @@ export class LlmGrader implements Grader {
       parts.push('[[ ## file_changes ## ]]', context.fileChanges, '');
     }
 
+    if (context.toolCalls) {
+      parts.push('[[ ## tool_calls ## ]]', context.toolCalls, '');
+    }
+
     if (rubrics && rubrics.length > 0) {
       parts.push('[[ ## rubrics ## ]]');
       for (const rubric of rubrics) {
@@ -923,6 +937,10 @@ export class LlmGrader implements Grader {
       parts.push('[[ ## file_changes ## ]]', context.fileChanges, '');
     }
 
+    if (context.toolCalls) {
+      parts.push('[[ ## tool_calls ## ]]', context.toolCalls, '');
+    }
+
     parts.push('[[ ## scoring_criteria ## ]]');
 
     for (const rubric of rubrics) {
@@ -985,6 +1003,10 @@ export class LlmGrader implements Grader {
       parts.push('[[ ## file_changes ## ]]', context.fileChanges, '');
     }
 
+    if (context.toolCalls) {
+      parts.push('[[ ## tool_calls ## ]]', context.toolCalls, '');
+    }
+
     parts.push('[[ ## rubrics ## ]]');
 
     for (const rubric of rubrics) {
diff --git a/packages/core/src/evaluation/graders/types.ts b/packages/core/src/evaluation/graders/types.ts
index 14e4fd445..1d548e5f9 100644
--- a/packages/core/src/evaluation/graders/types.ts
+++ b/packages/core/src/evaluation/graders/types.ts
@@ -55,6 +55,8 @@ export interface EvaluationContext {
   readonly availableTargets?: readonly string[];
   /** Unified diff of file changes from workspace */
   readonly fileChanges?: string;
+  /** Formatted summary of tool calls from agent execution */
+  readonly toolCalls?: string;
   /** Absolute path to the workspace directory */
   readonly workspacePath?: string;
   /** Docker workspace config: when present, code-grader commands run inside a container */
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index 6a1a3a0c4..1e4b24e12 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -15,6 +15,7 @@ import {
   type EvaluationScore,
   type Grader,
   LlmGrader,
+  formatToolCalls,
   negateScore,
   scoreToVerdict,
 } from './graders.js';
@@ -2271,6 +2272,9 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
     fileChanges = fileChanges ? `${fileChanges}\n${providerFileChanges}` : providerFileChanges;
   }
 
+  // Format tool calls for LLM grader template variable
+  const toolCalls = formatToolCalls(output);
+
   const providerError = extractProviderError(providerResponse);
 
   // Execute target after_each hook (runs before workspace after_each)
@@ -2369,6 +2373,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
       targetResolver,
       availableTargets,
       fileChanges,
+      toolCalls,
       workspacePath,
       dockerConfig: evalCase.workspace?.docker,
       verbose,
@@ -2631,6 +2636,7 @@ async function evaluateCandidate(options: {
   readonly targetResolver?: (name: string) => Provider | undefined;
   readonly availableTargets?: readonly string[];
   readonly fileChanges?: string;
+  readonly toolCalls?: string;
   readonly workspacePath?: string;
   readonly dockerConfig?: import('./types.js').DockerWorkspaceConfig;
   readonly verbose?: boolean;
@@ -2659,6 +2665,7 @@ async function evaluateCandidate(options: {
     targetResolver,
     availableTargets,
     fileChanges,
+    toolCalls,
     workspacePath,
     dockerConfig,
     threshold: evalThreshold,
@@ -2688,6 +2695,7 @@ async function evaluateCandidate(options: {
     targetResolver,
     availableTargets,
     fileChanges,
+    toolCalls,
     workspacePath,
     dockerConfig,
     threshold: evalThreshold,
@@ -2775,6 +2783,7 @@ async function runEvaluatorsForCase(options: {
   readonly targetResolver?: (name: string) => Provider | undefined;
   readonly availableTargets?: readonly string[];
   readonly fileChanges?: string;
+  readonly toolCalls?: string;
   readonly workspacePath?: string;
   readonly dockerConfig?: import('./types.js').DockerWorkspaceConfig;
   readonly threshold?: number;
@@ -2802,6 +2811,7 @@ async function runEvaluatorsForCase(options: {
     targetResolver,
     availableTargets,
     fileChanges,
+    toolCalls,
     workspacePath,
     dockerConfig,
     threshold,
@@ -2832,6 +2842,7 @@ async function runEvaluatorsForCase(options: {
       targetResolver,
       availableTargets,
       fileChanges,
+      toolCalls,
       workspacePath,
       dockerConfig,
       threshold,
@@ -2868,6 +2879,7 @@ async function runEvaluatorsForCase(options: {
     targetResolver,
     availableTargets,
     fileChanges,
+    toolCalls,
     workspacePath,
     dockerConfig,
     dependencyResults,
@@ -2914,6 +2926,7 @@ async function runEvaluatorList(options: {
   readonly targetResolver?: (name: string) => Provider | undefined;
   readonly availableTargets?: readonly string[];
   readonly fileChanges?: string;
+  readonly toolCalls?: string;
   readonly workspacePath?: string;
   readonly dockerConfig?: import('./types.js').DockerWorkspaceConfig;
   readonly threshold?: number;
@@ -2942,6 +2955,7 @@ async function runEvaluatorList(options: {
     targetResolver,
     availableTargets,
     fileChanges,
+    toolCalls,
     workspacePath,
     dockerConfig,
     dependencyResults,
@@ -2977,6 +2991,7 @@ async function runEvaluatorList(options: {
     targetResolver,
     availableTargets,
     fileChanges,
+    toolCalls,
     workspacePath,
     dockerConfig,
     dependencyResults,
diff --git a/packages/core/src/evaluation/template-variables.ts b/packages/core/src/evaluation/template-variables.ts
index 508d837db..9d92f0d87 100644
--- a/packages/core/src/evaluation/template-variables.ts
+++ b/packages/core/src/evaluation/template-variables.ts
@@ -8,6 +8,7 @@
  *   - {{ expected_output }} — reference answer as plain text
  *   - {{ criteria }}        — evaluation criteria string
  *   - {{ file_changes }}    — file diff (if available)
+ *   - {{ tool_calls }}     — formatted summary of tool calls from agent execution
  *
  * Deprecated aliases (emit a warning when used in custom templates):
  *   - {{ input_text }}           → use {{ input }}
@@ -20,6 +21,7 @@ export const TEMPLATE_VARIABLES = {
   INPUT: 'input',
   OUTPUT: 'output',
   FILE_CHANGES: 'file_changes',
+  TOOL_CALLS: 'tool_calls',
   /** @deprecated Use INPUT instead — resolves to the same text value. */
   INPUT_TEXT: 'input_text',
   /** @deprecated Use OUTPUT instead — resolves to the same text value. */
diff --git a/packages/core/test/evaluation/graders/format-tool-calls.test.ts b/packages/core/test/evaluation/graders/format-tool-calls.test.ts
new file mode 100644
index 000000000..6ce1122c5
--- /dev/null
+++ b/packages/core/test/evaluation/graders/format-tool-calls.test.ts
@@ -0,0 +1,132 @@
+import { describe, expect, it } from 'vitest';
+import { formatToolCalls } from '../../../src/evaluation/graders/format-tool-calls.js';
+import type { Message } from '../../../src/evaluation/providers/types.js';
+
+describe('formatToolCalls', () => {
+  it('returns empty string for undefined output', () => {
+    expect(formatToolCalls(undefined)).toBe('');
+  });
+
+  it('returns empty string for empty messages array', () => {
+    expect(formatToolCalls([])).toBe('');
+  });
+
+  it('returns empty string when no messages have tool calls', () => {
+    const messages: Message[] = [
+      { role: 'assistant', content: 'Hello' },
+      { role: 'user', content: 'Hi' },
+    ];
+    expect(formatToolCalls(messages)).toBe('');
+  });
+
+  it('formats Skill tool calls with skill name', () => {
+    const messages: Message[] = [
+      {
+        role: 'assistant',
+        toolCalls: [{ tool: 'Skill', input: { skill: 'commit' } }],
+      },
+    ];
+    expect(formatToolCalls(messages)).toBe('- Skill: commit');
+  });
+
+  it('formats Read/Write/Edit tool calls with file_path', () => {
+    const messages: Message[] = [
+      {
+        role: 'assistant',
+        toolCalls: [
+          { tool: 'Read', input: { file_path: '/src/index.ts' } },
+          { tool: 'Write', input: { file_path: '/src/output.ts', content: '...' } },
+          { tool: 'Edit', input: { file_path: '/src/edit.ts', old_string: 'a', new_string: 'b' } },
+        ],
+      },
+    ];
+    const result = formatToolCalls(messages);
+    expect(result).toBe('- Read: /src/index.ts\n- Write: /src/output.ts\n- Edit: /src/edit.ts');
+  });
+
+  it('formats Bash tool calls with command', () => {
+    const messages: Message[] = [
+      {
+        role: 'assistant',
+        toolCalls: [{ tool: 'Bash', input: { command: 'npm test' } }],
+      },
+    ];
+    expect(formatToolCalls(messages)).toBe('- Bash: npm test');
+  });
+
+  it('formats Grep/Glob tool calls with pattern', () => {
+    const messages: Message[] = [
+      {
+        role: 'assistant',
+        toolCalls: [
+          { tool: 'Grep', input: { pattern: 'TODO', path: '/src' } },
+          { tool: 'Glob', input: { pattern: '**/*.ts' } },
+        ],
+      },
+    ];
+    expect(formatToolCalls(messages)).toBe('- Grep: TODO\n- Glob: **/*.ts');
+  });
+
+  it('formats mixed tool calls across multiple messages', () => {
+    const messages: Message[] = [
+      {
+        role: 'assistant',
+        toolCalls: [
+          { tool: 'Read', input: { file_path: '/package.json' } },
+          { tool: 'Bash', input: { command: 'ls -la' } },
+        ],
+      },
+      { role: 'user', content: 'ok' },
+      {
+        role: 'assistant',
+        toolCalls: [{ tool: 'Skill', input: { skill: 'review-pr' } }],
+      },
+    ];
+    const result = formatToolCalls(messages);
+    expect(result).toBe('- Read: /package.json\n- Bash: ls -la\n- Skill: review-pr');
+  });
+
+  it('falls back to first short string field for unknown tools', () => {
+    const messages: Message[] = [
+      {
+        role: 'assistant',
+        toolCalls: [{ tool: 'CustomTool', input: { query: 'find me something' } }],
+      },
+    ];
+    expect(formatToolCalls(messages)).toBe('- CustomTool: find me something');
+  });
+
+  it('shows tool name only when input is empty', () => {
+    const messages: Message[] = [
+      {
+        role: 'assistant',
+        toolCalls: [{ tool: 'SomeTool', input: {} }],
+      },
+    ];
+    expect(formatToolCalls(messages)).toBe('- SomeTool');
+  });
+
+  it('shows tool name only when input is undefined', () => {
+    const messages: Message[] = [
+      {
+        role: 'assistant',
+        toolCalls: [{ tool: 'SomeTool' }],
+      },
+    ];
+    expect(formatToolCalls(messages)).toBe('- SomeTool');
+  });
+
+  it('truncates long input values', () => {
+    const longCommand = 'x'.repeat(200);
+    const messages: Message[] = [
+      {
+        role: 'assistant',
+        toolCalls: [{ tool: 'Bash', input: { command: longCommand } }],
+      },
+    ];
+    const result = formatToolCalls(messages);
+    expect(result).toContain('- Bash: ');
+    // 120 chars + ellipsis
+    expect(result.length).toBeLessThan(200);
+  });
+});