diff --git a/examples/eval/classifiers.rb b/examples/eval/classifiers.rb
new file mode 100644
index 00000000..2d93d320
--- /dev/null
+++ b/examples/eval/classifiers.rb
@@ -0,0 +1,144 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "opentelemetry/sdk"
+
+# Example: Classifiers
+#
+# Classifiers categorize and label eval outputs. Unlike scorers (which return
+# numeric 0-1 values), classifiers return structured Classification items —
+# each with an :id, an optional :label, and optional :metadata.
+#
+# Results are stored as a dictionary keyed by classifier name:
+#
+#   { "sentiment" => [{ id: "positive", label: "Positive" }] }
+#
+# Three patterns are shown:
+#
+#   1. Block-based (Braintrust::Classifier.new):
+#      Returns a single Classification hash. Good for concise, one-off classifiers.
+#
+#   2. Multi-label block-based:
+#      Returns an Array of Classification hashes — useful when a single
+#      classifier assigns multiple labels to the same output.
+#
+#   3. Class-based (include Braintrust::Classifier):
+#      Define a class with a #call method. Good for reusable classifiers
+#      that carry their own logic and state.
+#
+# Classifiers and scorers run independently. You can use both together, or
+# use only classifiers when you don't need numeric scores.
+#
+# Usage:
+#   bundle exec ruby examples/eval/classifiers.rb
+
+Braintrust.init
+
+# ---------------------------------------------------------------------------
+# Test cases: customer support messages
+# ---------------------------------------------------------------------------
+MESSAGES = [
+  {input: "Hi! I just wanted to say thank you, the product is amazing!"},
+  {input: "I've been waiting 2 weeks for my order. This is unacceptable!"},
+  {input: "How do I reset my password? I can't find the option anywhere."},
+  {input: "The item arrived damaged. I need a refund immediately."},
+  {input: "Just checking in — any update on my ticket #4821?"}
+]
+
+# ---------------------------------------------------------------------------
+# Simulated task: generate a support response (replace with a real LLM call)
+# ---------------------------------------------------------------------------
+def generate_response(message)
+  case message
+  when /thank/i then "You're welcome! So glad you're enjoying it."
+  when /waiting|order/i then "I sincerely apologise for the delay. Let me look into this right away."
+  when /password|reset/i then "To reset your password, go to Settings > Account > Reset Password."
+  when /damaged|refund/i then "I'm sorry to hear that. I'll process your refund immediately."
+  else "Thanks for reaching out! Let me check on that for you."
+  end
+end
+
+# ---------------------------------------------------------------------------
+# Pattern 1: block-based single-label classifier
+#
+# Classifies each message into a single intent category.
+# Declare only the kwargs you need — extras are filtered automatically.
+# ---------------------------------------------------------------------------
+intent_classifier = Braintrust::Classifier.new("intent") do |input:|
+  id = case input
+  when /thank/i then "praise"
+  when /waiting|order|update/i then "follow_up"
+  when /password|reset|find/i then "how_to"
+  when /damaged|refund/i then "complaint"
+  else "other"
+  end
+
+  {name: "intent", id: id, label: id.tr("_", " ").capitalize}
+end
+
+# ---------------------------------------------------------------------------
+# Pattern 2: block-based multi-label classifier
+#
+# A single classifier can return an Array to assign multiple labels.
+# All items sharing the same :name are grouped into the same results array.
+# ---------------------------------------------------------------------------
+tone_classifier = Braintrust::Classifier.new("tone") do |input:|
+  labels = []
+  labels << {name: "tone", id: "urgent", label: "Urgent"} if input.match?(/immediately|unacceptable|waiting/i)
+  labels << {name: "tone", id: "polite", label: "Polite"} if input.match?(/please|thank|just checking/i)
+  labels << {name: "tone", id: "frustrated", label: "Frustrated"} if input.match?(/unacceptable|damaged|waiting/i)
+  labels << {name: "tone", id: "neutral", label: "Neutral"} if labels.empty?
+  labels
+end
+
+# ---------------------------------------------------------------------------
+# Pattern 3: class-based classifier
+#
+# Include Braintrust::Classifier and define #call with keyword args.
+# The class name is snake_cased to derive the default classifier name
+# (ResponseQualityClassifier -> "response_quality_classifier").
+# Override #name to customise it.
+# ---------------------------------------------------------------------------
+class ResponseQualityClassifier
+  include Braintrust::Classifier
+
+  def name
+    "response_quality"
+  end
+
+  def call(input:, output:)
+    word_count = output.to_s.split.length
+
+    id = if output.to_s.strip.empty?
+      "no_response"
+    elsif word_count < 5
+      "too_short"
+    elsif output.match?(/immediately|right away|look into/i)
+      "action_oriented"
+    else
+      "informational"
+    end
+
+    {
+      name: "response_quality",
+      id: id,
+      label: id.tr("_", " ").capitalize,
+      metadata: {word_count: word_count}
+    }
+  end
+end
+
+# ---------------------------------------------------------------------------
+# Run the eval — classifiers only (no numeric scores needed here)
+# ---------------------------------------------------------------------------
+Braintrust::Eval.run(
+  project: "ruby-sdk-examples",
+  experiment: "classifiers-example",
+  cases: MESSAGES,
+  task: ->(input:) { generate_response(input) },
+  classifiers: [intent_classifier, tone_classifier, ResponseQualityClassifier.new]
+)
+
+OpenTelemetry.tracer_provider.shutdown
diff --git a/lib/braintrust/classifier.rb b/lib/braintrust/classifier.rb
new file mode 100644
index 00000000..089de302
--- /dev/null
+++ b/lib/braintrust/classifier.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+require_relative "internal/callable"
+
+module Braintrust
+  # Classifier wraps a classification function that categorizes and labels eval outputs.
+  #
+  # Unlike scorers (which return numeric 0-1 values), classifiers return structured
+  # {Classification} items with an id and optional label and metadata.
+  #
+  # Use inline with a block (keyword args):
+  #   classifier = Classifier.new("category") { |output:| {name: "category", id: "greeting", label: "Greeting"} }
+  #
+  # Or include in a class and define #call with keyword args:
+  #   class CategoryClassifier
+  #     include Braintrust::Classifier
+  #
+  #     def call(output:)
+  #       {name: "category", id: "greeting", label: "Greeting"}
+  #     end
+  #   end
+  #
+  # Classifiers may return a single Classification hash, an Array of them, or nil
+  # (meaning no classifications for this case).
+  module Classifier
+    DEFAULT_NAME = "classifier"
+
+    # @param base [Class] the class including Classifier
+    def self.included(base)
+      base.include(Callable)
+    end
+
+    # Create a block-based classifier.
+    #
+    # @param name [String, nil] optional name (defaults to "classifier")
+    # @param block [Proc] the classification implementation; declare only the keyword
+    #   args you need. Extra kwargs are filtered out automatically.
+    #
+    #   Supported kwargs: +input:+, +expected:+, +output:+, +metadata:+, +trace:+, +parameters:+
+    # @return [Classifier::Block]
+    # @raise [ArgumentError] if the block has unsupported arity
+    def self.new(name = nil, &block)
+      Block.new(name: name || DEFAULT_NAME, &block)
+    end
+
+    # Included into classes that +include Classifier+. Prepends KeywordFilter and
+    # ClassificationNormalizer so #call receives only declared kwargs and always returns
+    # Array<Hash>. Also provides a default #name and #call_parameters.
+    module Callable
+      # Normalizes the raw return value of #call into Array<Hash>.
+      # Nested inside Callable because it depends on #name which Callable provides.
+      module ClassificationNormalizer
+        # @return [Array<Hash>] normalized classification hashes with :name, :id, and optional :label, :metadata keys
+        def call(**kwargs)
+          normalize_classification_result(super)
+        end
+
+        private
+
+        # @param result [Hash, Array<Hash>, nil] raw return value from #call
+        # @return [Array<Hash>] zero or more classification hashes with :name, :id keys
+        # @raise [ArgumentError] if any item is not a non-empty object
+        def normalize_classification_result(result)
+          case result
+          when nil then []
+          when Array then result.map { |item| normalize_classification_item(item) }
+          when Hash then [normalize_classification_item(result)]
+          else
+            raise ArgumentError, "When returning structured classifier results, each classification must be a non-empty object. Got: #{result.inspect}"
+          end
+        end
+
+        # Fills in missing :name from the classifier, validates :id.
+        # @param item [Hash] a classification hash
+        # @return [Hash] the item with :name defaulted and validated
+        # @raise [ArgumentError] if item is not a non-empty Hash
+        def normalize_classification_item(item)
+          unless item.is_a?(Hash) && !item.empty?
+            raise ArgumentError, "When returning structured classifier results, each classification must be a non-empty object. Got: #{item.inspect}"
+          end
+
+          # :name defaults to the classifier's resolved name when missing, empty, or non-string
+          unless item[:name].is_a?(String) && !item[:name].empty?
+            item = item.merge(name: name)
+          end
+
+          item
+        end
+      end
+
+      # Infrastructure modules prepended onto every classifier class.
+      # Used both to set up the ancestor chain and to skip past them in
+      # #call_parameters so KeywordFilter sees the real call signature.
+      PREPENDED = [Internal::Callable::KeywordFilter, ClassificationNormalizer].freeze
+
+      # @param base [Class] the class including Callable
+      def self.included(base)
+        PREPENDED.each { |mod| base.prepend(mod) }
+      end
+
+      # Default name derived from the class name (e.g. CategoryClassifier -> "category_classifier").
+      # @return [String]
+      def name
+        klass = self.class.name&.split("::")&.last
+        return Classifier::DEFAULT_NAME unless klass
+        klass.gsub(/([a-z])([A-Z])/, '\1_\2').downcase
+      end
+
+      # Provides KeywordFilter with the actual call signature of the subclass.
+      # Walks past PREPENDED modules in the ancestor chain so that user-defined
+      # #call keyword params are correctly introspected.
+      # Block overrides this to point directly at @block.parameters.
+      # @return [Array<Array>] parameter list
+      def call_parameters
+        meth = method(:call)
+        meth = meth.super_method while meth.super_method && PREPENDED.include?(meth.owner)
+        meth.parameters
+      end
+    end
+
+    # Block-based classifier. Stores a Proc and delegates #call to it.
+    # Includes Classifier so it satisfies +Classifier ===+ checks.
+    # Exposes #call_parameters so KeywordFilter can introspect the block's
+    # declared kwargs rather than Block#call's **kwargs signature.
+    class Block
+      include Classifier
+
+      # @return [String]
+      attr_reader :name
+
+      # @param name [String] classifier name
+      # @param block [Proc] classification implementation; must use keyword args or zero-arity
+      # @raise [ArgumentError] if the block uses positional params
+      def initialize(name: DEFAULT_NAME, &block)
+        @name = name
+        params = block.parameters
+        unless Internal::Callable::KeywordFilter.has_any_keywords?(params) || block.arity == 0
+          raise ArgumentError, "Classifier block must use keyword args (got arity #{block.arity})"
+        end
+        @block = block
+      end
+
+      # @param kwargs [Hash] keyword arguments (filtered by KeywordFilter)
+      # @return [Array<Hash>] normalized classification results
+      def call(**kwargs)
+        @block.call(**kwargs)
+      end
+
+      # Exposes the block's parameter list so KeywordFilter can filter
+      # kwargs to match the block's declared keywords.
+      # @return [Array<Array>] parameter list from Proc#parameters
+      def call_parameters
+        @block.parameters
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/eval.rb b/lib/braintrust/eval.rb
index d661eedc..ac27b48b 100644
--- a/lib/braintrust/eval.rb
+++ b/lib/braintrust/eval.rb
@@ -1,5 +1,6 @@
 # frozen_string_literal: true
 
+require_relative "classifier"
 require_relative "scorer"
 require_relative "task"
 require_relative "functions"
@@ -160,7 +161,10 @@ def scorer(name, callable = nil, &block)
       #   - String: dataset name (fetches from same project)
       #   - Hash: {name:, id:, project:, version:, limit:}
       # @param task [#call] The task to evaluate (must be callable)
-      # @param scorers [Array<String, Scorer, #call>] The scorers to use (String names, Scorer objects, or callables)
+      # @param scorers [Array<String, Scorer, #call>, nil] The scorers to use (String names, Scorer objects, or callables).
+      #   At least one of scorers or classifiers must be provided.
+      # @param classifiers [Array<Classifier, #call>, nil] The classifiers to use.
+      #   At least one of scorers or classifiers must be provided.
       # @param on_progress [#call, nil] Optional callback fired after each test case.
       #   Receives a Hash: {"data" => output, "scores" => {name => value}} on success,
       #   or {"error" => message} on failure.
@@ -177,13 +181,16 @@ def scorer(name, callable = nil, &block)
       # @param parent [Hash, nil] Parent span context ({object_type:, object_id:, generation:})
       # @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
       # @return [Result]
-      def run(task:, scorers:, project: nil, experiment: nil,
-        cases: nil, dataset: nil, on_progress: nil,
+      def run(task:, scorers: nil, classifiers: nil, project: nil,
+        experiment: nil, cases: nil, dataset: nil, on_progress: nil,
         parallelism: 1, tags: nil, metadata: nil, update: false, quiet: false,
         state: nil, tracer_provider: nil, project_id: nil, parent: nil,
         parameters: nil)
         # Validate required parameters
-        validate_params!(task: task, scorers: scorers, cases: cases, dataset: dataset)
+        validate_params!(task: task, scorers: scorers,
+          classifiers: classifiers, cases: cases, dataset: dataset)
+        scorers ||= []
+        classifiers ||= []
 
         experiment_id = nil
         project_name = project
@@ -216,6 +223,7 @@ def run(task:, scorers:, project: nil, experiment: nil,
         context = Context.build(
           task: task,
           scorers: scorers,
+          classifiers: classifiers,
           cases: cases,
           experiment_id: experiment_id,
           experiment_name: experiment,
@@ -245,9 +253,19 @@ def print_result(result)
 
       # Validate required parameters
       # @raise [ArgumentError] if validation fails
-      def validate_params!(task:, scorers:, cases:, dataset:)
+      def validate_params!(task:, scorers:, classifiers:, cases:, dataset:)
         raise ArgumentError, "task is required" unless task
-        raise ArgumentError, "scorers is required" unless scorers
+
+        # Validate task is callable before anything else
+        unless task.respond_to?(:call)
+          raise ArgumentError, "task must be callable (respond to :call)"
+        end
+
+        has_scorers = scorers && !scorers.empty?
+        has_classifiers = classifiers && !classifiers.empty?
+        unless has_scorers || has_classifiers
+          raise ArgumentError, "at least one of scorers or classifiers is required"
+        end
 
         # Validate cases and dataset are mutually exclusive
         if cases && dataset
@@ -258,11 +276,6 @@ def validate_params!(task:, scorers:, cases:, dataset:)
         unless cases || dataset
           raise ArgumentError, "must specify either 'cases' or 'dataset'"
         end
-
-        # Validate task is callable
-        unless task.respond_to?(:call)
-          raise ArgumentError, "task must be callable (respond to :call)"
-        end
       end
 
       # Resolve project by name or ID. Creates if needed.
diff --git a/lib/braintrust/eval/context.rb b/lib/braintrust/eval/context.rb
index ebcca050..02717267 100644
--- a/lib/braintrust/eval/context.rb
+++ b/lib/braintrust/eval/context.rb
@@ -1,18 +1,20 @@
 # frozen_string_literal: true
 
 require_relative "cases"
+require_relative "../classifier"
 
 module Braintrust
   module Eval
     # Holds all normalized, ready-to-execute eval components.
     # Use Context.build to construct from raw user inputs.
     class Context
-      attr_reader :task, :scorers, :cases, :experiment_id, :experiment_name,
-        :project_id, :project_name, :state, :tracer_provider,
+      attr_reader :task, :scorers, :classifiers, :cases, :experiment_id,
+        :experiment_name, :project_id, :project_name, :state, :tracer_provider,
         :on_progress, :parent_span_attr, :generation, :parameters
 
       # @param task [Task] Normalized task wrapper
       # @param scorers [Array<Scorer>] Normalized scorer wrappers
+      # @param classifiers [Array<Classifier>] Normalized classifier wrappers
       # @param cases [Cases] Normalized eval cases
       # @param experiment_id [String, nil] Experiment ID for logging and trace linkage
       # @param experiment_name [String, nil] Experiment name, included in span attributes
@@ -24,11 +26,13 @@ class Context
       # @param parent_span_attr [String, nil] Formatted parent span identifier ("type:id"), linking spans to a parent context
       # @param generation [Integer, nil] Generation number from the parent span context, used to link spans in a trace hierarchy
       # @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
-      def initialize(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil,
-        project_id: nil, project_name: nil, state: nil, tracer_provider: nil,
-        on_progress: nil, parent_span_attr: nil, generation: nil, parameters: nil)
+      def initialize(task:, scorers:, cases:, classifiers: [],
+        experiment_id: nil, experiment_name: nil, project_id: nil,
+        project_name: nil, state: nil, tracer_provider: nil, on_progress: nil,
+        parent_span_attr: nil, generation: nil, parameters: nil)
         @task = task
         @scorers = scorers
+        @classifiers = classifiers
         @cases = cases
         @experiment_id = experiment_id
         @experiment_name = experiment_name
@@ -46,6 +50,7 @@ def initialize(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil
       # Delegates to Factory for normalization.
       # @param task [Task, Proc, #call] Task to evaluate; wrapped into a {Task} if needed
       # @param scorers [Array<Scorer, Proc, String, Scorer::ID, #call>] Scorers; each is normalized into a {Scorer}
+      # @param classifiers [Array<Classifier, Proc, #call>] Classifiers; each is normalized into a {Classifier}
       # @param cases [Cases, Array, Enumerable] Eval cases; wrapped into {Cases} if needed
       # @param experiment_id [String, nil] Experiment ID for logging
       # @param experiment_name [String, nil] Experiment name, included in span attributes
@@ -57,14 +62,15 @@ def initialize(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil
       # @param parent [Hash, nil] Parent span info with keys :object_type, :object_id, and optionally :generation
       # @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
       # @return [Context]
-      def self.build(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil,
-        project_id: nil, project_name: nil, state: nil, tracer_provider: nil,
-        on_progress: nil, parent: nil, parameters: nil)
+      def self.build(task:, scorers:, cases:, classifiers: [],
+        experiment_id: nil, experiment_name: nil, project_id: nil,
+        project_name: nil, state: nil, tracer_provider: nil, on_progress: nil,
+        parent: nil, parameters: nil)
         Factory.new(
           state: state, tracer_provider: tracer_provider,
           project_id: project_id, project_name: project_name
         ).build(
-          task: task, scorers: scorers, cases: cases,
+          task: task, scorers: scorers, classifiers: classifiers, cases: cases,
           experiment_id: experiment_id, experiment_name: experiment_name,
           on_progress: on_progress, parent: parent, parameters: parameters
         )
@@ -86,17 +92,19 @@ def initialize(state: nil, tracer_provider: nil, project_id: nil, project_name:
         # Normalize raw inputs and construct a {Context}.
         # @param task [Task, Proc, #call] Raw task
         # @param scorers [Array] Raw scorers
+        # @param classifiers [Array] Raw classifiers
         # @param cases [Cases, Array, Enumerable] Raw eval cases
         # @param experiment_id [String, nil]
         # @param experiment_name [String, nil]
         # @param on_progress [Proc, nil]
         # @param parent [Hash, nil] Parent span info with keys :object_type, :object_id, and optionally :generation
         # @return [Context]
-        def build(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil,
-          on_progress: nil, parent: nil, parameters: nil)
+        def build(task:, scorers:, cases:, classifiers: [], experiment_id: nil,
+          experiment_name: nil, on_progress: nil, parent: nil, parameters: nil)
           Context.new(
             task: normalize_task(task),
             scorers: normalize_scorers(scorers),
+            classifiers: normalize_classifiers(classifiers),
             cases: normalize_cases(cases),
             experiment_id: experiment_id,
             experiment_name: experiment_name,
@@ -188,6 +196,23 @@ def normalize_scorers(raw)
             end
           end
         end
+
+        # @param raw [Array<Classifier, Proc, #call>]
+        # @return [Array<Classifier>]
+        def normalize_classifiers(raw)
+          raw.map do |classifier|
+            case classifier
+            when Braintrust::Classifier
+              classifier
+            when Proc
+              # Pass Proc/Lambda directly to preserve keyword arg info
+              Braintrust::Classifier.new(&classifier)
+            else
+              name = classifier.respond_to?(:name) ? classifier.name : nil
+              Braintrust::Classifier.new(name, &classifier.method(:call))
+            end
+          end
+        end
       end
     end
   end
diff --git a/lib/braintrust/eval/evaluator.rb b/lib/braintrust/eval/evaluator.rb
index a5f135bc..6798f31a 100644
--- a/lib/braintrust/eval/evaluator.rb
+++ b/lib/braintrust/eval/evaluator.rb
@@ -40,11 +40,12 @@ module Eval
     #     }
     #   )
     class Evaluator
-      attr_accessor :task, :scorers, :parameters
+      attr_accessor :task, :scorers, :classifiers, :parameters
 
-      def initialize(task: nil, scorers: [], parameters: {})
+      def initialize(task: nil, scorers: [], classifiers: [], parameters: {})
         @task = task
         @scorers = scorers
+        @classifiers = classifiers
         @parameters = parameters
       end
 
@@ -68,6 +69,7 @@ def validate!
       # @param project_id [String, nil] Project UUID (skips project creation)
       # @param dataset [String, Hash, Dataset, Dataset::ID, nil] Dataset to fetch
       # @param scorers [Array, nil] Additional scorers (merged with evaluator's own)
+      # @param classifiers [Array, nil] Additional classifiers (merged with evaluator's own)
       # @param parent [Hash, nil] Parent span context
       # @param state [State, nil] Braintrust state
       # @param update [Boolean] If true, allow reusing existing experiment (default: false)
@@ -75,16 +77,19 @@ def validate!
       # @return [Result]
       def run(cases, on_progress: nil, quiet: false,
         project: nil, experiment: nil, project_id: nil,
-        dataset: nil, scorers: nil, parent: nil,
+        dataset: nil, scorers: nil, classifiers: nil, parent: nil,
         state: nil, update: false, tracer_provider: nil,
         parameters: nil)
         all_scorers = scorers ? self.scorers + scorers : self.scorers
+        all_classifiers = classifiers ?
+          self.classifiers + classifiers :
+          self.classifiers
         Braintrust::Eval.run(
-          task: task, scorers: all_scorers, cases: cases, dataset: dataset,
-          project: project, experiment: experiment, project_id: project_id,
-          parent: parent, on_progress: on_progress, quiet: quiet,
-          state: state, update: update, tracer_provider: tracer_provider,
-          parameters: parameters
+          task: task, scorers: all_scorers, classifiers: all_classifiers,
+          cases: cases, dataset: dataset, project: project,
+          experiment: experiment, project_id: project_id, parent: parent,
+          on_progress: on_progress, quiet: quiet, state: state, update: update,
+          tracer_provider: tracer_provider, parameters: parameters
         )
       end
     end
diff --git a/lib/braintrust/eval/result.rb b/lib/braintrust/eval/result.rb
index c18af302..7af7132f 100644
--- a/lib/braintrust/eval/result.rb
+++ b/lib/braintrust/eval/result.rb
@@ -9,7 +9,7 @@ module Eval
     # Contains experiment metadata, errors, timing information, and raw score data
     class Result
       attr_reader :experiment_id, :experiment_name, :project_id, :project_name,
-        :permalink, :errors, :duration, :scores
+        :permalink, :errors, :duration, :scores, :classifications
 
       # Create a new result
       # @param experiment_id [String] The experiment ID
@@ -20,8 +20,9 @@ class Result
       # @param errors [Array<String>] List of errors that occurred
       # @param duration [Float] Duration in seconds
       # @param scores [Hash, nil] Raw score data { scorer_name => Array<Numeric> }
+      # @param classifications [Hash, nil] Classification results { name => Array<ClassificationItem> }, nil when no classifiers ran
       def initialize(experiment_id:, experiment_name:, project_id:, project_name:,
-        permalink:, errors:, duration:, scores: nil)
+        permalink:, errors:, duration:, scores: nil, classifications: nil)
         @experiment_id = experiment_id
         @experiment_name = experiment_name
         @project_id = project_id
@@ -30,6 +31,7 @@ def initialize(experiment_id:, experiment_name:, project_id:, project_name:,
         @errors = errors
         @duration = duration
         @scores = scores
+        @classifications = classifications
       end
 
       # Check if the evaluation was successful (no errors)
diff --git a/lib/braintrust/eval/runner.rb b/lib/braintrust/eval/runner.rb
index f461e041..074415ff 100644
--- a/lib/braintrust/eval/runner.rb
+++ b/lib/braintrust/eval/runner.rb
@@ -27,8 +27,9 @@ def initialize(eval_context)
         @eval_context = eval_context
         @tracer = eval_context.tracer_provider.tracer("braintrust-eval")
 
-        # Mutex for thread-safe score collection
+        # Mutexes for thread-safe result collection
         @score_mutex = Mutex.new
+        @classification_mutex = Mutex.new
       end
 
       # Run evaluation and return Result
@@ -39,6 +40,7 @@ def run(parallelism: 1)
         eval_cases = eval_context.cases
         errors = Queue.new
         @scores = {} # Reset for each run: { scorer_name => Array<Numeric> }
+        @classifications = {} # Reset for each run: { classifier_name => Array<ClassificationItem> }
 
         if parallelism && parallelism > 1
           Internal::ThreadPool.each(eval_cases, parallelism: parallelism) do |eval_case|
@@ -69,7 +71,8 @@ def run(parallelism: 1)
           permalink: permalink,
           errors: error_array,
           duration: duration,
-          scores: @scores
+          scores: @scores,
+          classifications: @classifications.empty? ? nil : @classifications
         )
       end
 
@@ -119,6 +122,17 @@ def run_eval_case(kase, errors)
             errors << "Scorers failed for input '#{kase.input}': #{e.message}"
           end
 
+          # Run classifiers (independent of scorers; errors do not abort eval)
+          classifier_errors = run_classifiers(kase, eval_span)
+          unless classifier_errors.empty?
+            existing_metadata = kase.metadata || {}
+            classifier_errors_metadata = existing_metadata.merge(classifier_errors: classifier_errors)
+            set_json_attr(eval_span, "braintrust.metadata", classifier_errors_metadata)
+            classifier_errors.each do |classifier_name, message|
+              errors << "Classifier '#{classifier_name}' failed for input '#{kase.input}': #{message}"
+            end
+          end
+
           # Set output after task completes
           set_json_attr(eval_span, "braintrust.output_json", {output: kase.output})
 
@@ -318,6 +332,104 @@ def collect_scores(score_results)
           score_results.each { |s| (@scores[s[:name]] ||= []) << s[:score] }
         end
       end
+
+      # Run all classifiers for a case. Classifier errors are non-fatal and stored in metadata.
+      # @param kase [CaseContext] The per-case context (output must be populated)
+      # @param eval_span [OpenTelemetry::Trace::Span] The eval span for this case
+      # @return [Hash] classifier_errors map (name -> error message), empty if no errors
+      def run_classifiers(kase, eval_span)
+        return {} if eval_context.classifiers.empty?
+
+        classifier_kwargs = {
+          input: kase.input,
+          expected: kase.expected,
+          output: kase.output,
+          metadata: kase.metadata || {},
+          trace: kase.trace,
+          parameters: eval_context.parameters || {}
+        }
+        classifier_input = {
+          input: kase.input,
+          expected: kase.expected,
+          output: kase.output,
+          metadata: kase.metadata || {},
+          parameters: eval_context.parameters || {}
+        }
+
+        case_classifications = {}
+        classifier_errors = {}
+
+        eval_context.classifiers.each_with_index do |classifier, index|
+          classifier_name = classifier.name || "classifier_#{index}"
+          begin
+            results = run_classifier(classifier, classifier_kwargs, classifier_input)
+            results.each do |item|
+              item_name = item[:name]
+              classification_item = item.except(:name)
+              (case_classifications[item_name] ||= []) << classification_item
+            end
+            collect_classifications(results)
+          rescue => e
+            Braintrust::Log.warn("[Classifier] #{classifier_name} failed: #{e.message}")
+            classifier_errors[classifier_name] = e.message
+          end
+        end
+
+        unless case_classifications.empty?
+          set_json_attr(eval_span, "braintrust.classifications", case_classifications)
+        end
+
+        classifier_errors
+      end
+
+      # Run a single classifier inside its own span.
+      # @param classifier [Classifier] The classifier to run
+      # @param classifier_kwargs [Hash] Keyword arguments for the classifier
+      # @param classifier_input [Hash] Input to log on the span
+      # @return [Array<Hash>] Normalized classification results from the classifier
+      def run_classifier(classifier, classifier_kwargs, classifier_input)
+        tracer.in_span(classifier.name) do |classifier_span|
+          classifier_span.set_attribute("braintrust.parent", eval_context.parent_span_attr) if eval_context.parent_span_attr
+          set_json_attr(classifier_span, "braintrust.span_attributes", build_classifier_span_attributes(classifier.name))
+          set_json_attr(classifier_span, "braintrust.input_json", classifier_input)
+
+          classification_results = classifier.call(**classifier_kwargs)
+
+          # Build output dict keyed by name -> array of items (for span logging)
+          output_by_name = {}
+          classification_results.each do |item|
+            (output_by_name[item[:name]] ||= []) << item.except(:name)
+          end
+
+          set_json_attr(classifier_span, "braintrust.output_json", output_by_name)
+
+          classification_results
+        rescue => e
+          record_span_error(classifier_span, e, "ClassifierError")
+          raise
+        end
+      end
+
+      # Build span_attributes for a classifier span.
+      # @param classifier_name [String] The classifier name
+      # @return [Hash]
+      def build_classifier_span_attributes(classifier_name)
+        attrs = {type: "classifier", name: classifier_name, purpose: "scorer"}
+        attrs[:generation] = eval_context.generation if eval_context.generation
+        attrs
+      end
+
+      # Collect classification results into the global accumulator (thread-safe).
+      # Converts Classification to ClassificationItem by dropping :name.
+      # @param classification_results [Array<Hash>] Classification results from a classifier
+      def collect_classifications(classification_results)
+        @classification_mutex.synchronize do
+          classification_results.each do |item|
+            item_name = item[:name]
+            (@classifications[item_name] ||= []) << item.except(:name)
+          end
+        end
+      end
     end
   end
 end
diff --git a/lib/braintrust/server/services/list_service.rb b/lib/braintrust/server/services/list_service.rb
index 06bd7add..8c29c6d3 100644
--- a/lib/braintrust/server/services/list_service.rb
+++ b/lib/braintrust/server/services/list_service.rb
@@ -20,6 +20,11 @@ def call
               {"name" => scorer_name}
             end
             entry = {"scores" => scores}
+            classifiers = (evaluator.classifiers || []).each_with_index.map do |classifier, i|
+              classifier_name = classifier.respond_to?(:name) ? classifier.name : "classifier_#{i}"
+              {"name" => classifier_name}
+            end
+            entry["classifiers"] = classifiers unless classifiers.empty?
             params = serialize_parameters(evaluator.parameters)
             entry["parameters"] = params if params
             result[name] = entry
diff --git a/test/braintrust/classifier_test.rb b/test/braintrust/classifier_test.rb
new file mode 100644
index 00000000..8dfe4501
--- /dev/null
+++ b/test/braintrust/classifier_test.rb
@@ -0,0 +1,226 @@
+# frozen_string_literal: true
+
+require "test_helper"
+require "braintrust/classifier"
+
+class Braintrust::ClassifierTest < Minitest::Test
+  # ============================================
+  # Classifier.new with block (inline classifiers)
+  # ============================================
+
+  def test_classifier_with_kwargs_block
+    classifier = Braintrust::Classifier.new("category") do |output:, **|
+      {name: "category", id: "greeting", label: "Greeting"}
+    end
+
+    assert_equal "category", classifier.name
+    result = classifier.call(input: "hello", expected: nil, output: "hello")
+    assert_equal [{name: "category", id: "greeting", label: "Greeting"}], result
+  end
+
+  def test_classifier_with_subset_kwargs_filters_extra_keys
+    classifier = Braintrust::Classifier.new("category") do |output:|
+      {name: "category", id: "word"}
+    end
+
+    result = classifier.call(input: "x", expected: nil, output: "hello", metadata: {}, tags: ["t"])
+    assert_equal [{name: "category", id: "word"}], result
+  end
+
+  def test_classifier_returns_nil_produces_empty_array
+    classifier = Braintrust::Classifier.new("maybe") { |**| nil }
+    assert_equal [], classifier.call(output: "hello")
+  end
+
+  def test_classifier_returns_array_of_classifications
+    classifier = Braintrust::Classifier.new("sentiment") do |**|
+      [
+        {name: "sentiment", id: "positive", label: "Positive"},
+        {name: "sentiment", id: "enthusiastic", label: "Enthusiastic"}
+      ]
+    end
+
+    result = classifier.call(output: "great!")
+    assert_equal 2, result.length
+    assert_equal({name: "sentiment", id: "positive", label: "Positive"}, result[0])
+    assert_equal({name: "sentiment", id: "enthusiastic", label: "Enthusiastic"}, result[1])
+  end
+
+  def test_classifier_with_metadata
+    classifier = Braintrust::Classifier.new("category") do |**|
+      {name: "category", id: "greeting", label: "Greeting", metadata: {source: "unit-test"}}
+    end
+
+    result = classifier.call(output: "hello")
+    assert_equal [{name: "category", id: "greeting", label: "Greeting", metadata: {source: "unit-test"}}], result
+  end
+
+  # ============================================
+  # Name defaulting
+  # ============================================
+
+  def test_name_defaults_to_classifier_function_name_when_missing
+    classifier = Braintrust::Classifier.new("my_classifier") { |**|
+      {id: "foo"} # no :name key
+    }
+
+    result = classifier.call(output: "x")
+    assert_equal "my_classifier", result[0][:name]
+  end
+
+  def test_name_defaults_to_classifier_function_name_when_empty_string
+    classifier = Braintrust::Classifier.new("my_classifier") { |**|
+      {name: "", id: "foo"}
+    }
+
+    result = classifier.call(output: "x")
+    assert_equal "my_classifier", result[0][:name]
+  end
+
+  def test_name_defaults_to_classifier_function_name_when_not_a_string
+    classifier = Braintrust::Classifier.new("my_classifier") { |**|
+      {name: 42, id: "foo"}
+    }
+
+    result = classifier.call(output: "x")
+    assert_equal "my_classifier", result[0][:name]
+  end
+
+  def test_explicit_name_in_result_takes_precedence
+    classifier = Braintrust::Classifier.new("my_classifier") { |**|
+      {name: "override_name", id: "foo"}
+    }
+
+    result = classifier.call(output: "x")
+    assert_equal "override_name", result[0][:name]
+  end
+
+  # ============================================
+  # Validation
+  # ============================================
+
+  def test_classifier_non_empty_object_validation_nil_item
+    classifier = Braintrust::Classifier.new("bad") { |**| [nil] }
+
+    error = assert_raises(ArgumentError) do
+      classifier.call(output: "x")
+    end
+    assert_match(/each classification must be a non-empty object/, error.message)
+    assert_match(/nil/, error.message)
+  end
+
+  def test_classifier_non_empty_object_validation_empty_hash
+    classifier = Braintrust::Classifier.new("bad") { |**| {} }
+
+    error = assert_raises(ArgumentError) do
+      classifier.call(output: "x")
+    end
+    assert_match(/each classification must be a non-empty object/, error.message)
+  end
+
+  def test_classifier_non_empty_object_validation_string_item
+    classifier = Braintrust::Classifier.new("bad") { |**| ["not-a-hash"] }
+
+    error = assert_raises(ArgumentError) do
+      classifier.call(output: "x")
+    end
+    assert_match(/each classification must be a non-empty object/, error.message)
+  end
+
+  def test_classifier_non_empty_object_validation_non_hash_scalar
+    classifier = Braintrust::Classifier.new("bad") { |**| 42 }
+
+    error = assert_raises(ArgumentError) do
+      classifier.call(output: "x")
+    end
+    assert_match(/each classification must be a non-empty object/, error.message)
+  end
+
+  def test_classifier_positional_params_raises
+    error = assert_raises(ArgumentError) do
+      Braintrust::Classifier.new("bad") { |a, b| a }
+    end
+
+    assert_match(/classifier block must use keyword args/i, error.message)
+  end
+
+  # ============================================
+  # Name detection
+  # ============================================
+
+  def test_classifier_name_defaults_to_classifier_for_base_class
+    classifier = Braintrust::Classifier.new { |**| {id: "x"} }
+    assert_equal "classifier", classifier.name
+  end
+
+  def test_classifier_explicit_name_takes_precedence
+    classifier = Braintrust::Classifier.new("my_name") { |**| {id: "x"} }
+    assert_equal "my_name", classifier.name
+  end
+
+  # ============================================
+  # Subclass pattern
+  # ============================================
+
+  def test_subclass_with_call_override
+    klass = Class.new do
+      include Braintrust::Classifier
+
+      def call(output:)
+        {name: "category", id: output.empty? ? "empty" : "nonempty"}
+      end
+    end
+
+    classifier = klass.new
+    assert_kind_of Braintrust::Classifier, classifier
+
+    result = classifier.call(input: "x", expected: nil, output: "hello")
+    assert_equal [{name: "category", id: "nonempty"}], result
+
+    result2 = classifier.call(input: "x", expected: nil, output: "")
+    assert_equal [{name: "category", id: "empty"}], result2
+  end
+
+  def test_subclass_with_name_override
+    klass = Class.new do
+      include Braintrust::Classifier
+
+      def name
+        "custom_classifier"
+      end
+
+      def call(**)
+        {id: "foo"}
+      end
+    end
+
+    classifier = klass.new
+    assert_equal "custom_classifier", classifier.name
+  end
+
+  def test_subclass_name_derived_from_class_name
+    klass = Class.new do
+      include Braintrust::Classifier
+
+      def call(**)
+        {id: "foo"}
+      end
+    end
+
+    Braintrust.stub_const(:FuzzyMatchTestClassifier, klass) do
+      classifier = klass.new
+      assert_equal "fuzzy_match_test_classifier", classifier.name
+    end
+  end
+
+  def test_subclass_without_call_raises_on_call
+    klass = Class.new do
+      include Braintrust::Classifier
+    end
+    classifier = klass.new
+
+    assert_raises(NoMethodError) do
+      classifier.call(output: "x")
+    end
+  end
+end
diff --git a/test/braintrust/contrib/rails/server/eval_controller_test.rb b/test/braintrust/contrib/rails/server/eval_controller_test.rb
index 8eaaa54e..93bd8ad0 100644
--- a/test/braintrust/contrib/rails/server/eval_controller_test.rb
+++ b/test/braintrust/contrib/rails/server/eval_controller_test.rb
@@ -24,7 +24,7 @@ def app
           end
 
           def test_streams_sse_events_for_inline_data
-            @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase })
+            @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }, scorers: [noop_scorer])
             reset_engine!(evaluators: @evaluators, auth: :none)
 
             post_json "/eval", {
@@ -53,7 +53,7 @@ def test_streams_sse_events_for_inline_data
           end
 
           def test_progress_events_contain_output
-            @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase })
+            @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }, scorers: [noop_scorer])
             reset_engine!(evaluators: @evaluators, auth: :none)
 
             post_json "/eval", {
@@ -94,7 +94,7 @@ def test_summary_event_contains_scores
           end
 
           def test_error_still_emits_progress_and_done
-            @evaluators["failing-eval"] = test_evaluator(task: ->(_input) { raise "task exploded" })
+            @evaluators["failing-eval"] = test_evaluator(task: ->(_input) { raise "task exploded" }, scorers: [noop_scorer])
             reset_engine!(evaluators: @evaluators, auth: :none)
 
             post_json "/eval", {
@@ -162,6 +162,10 @@ def test_evaluator(**kwargs)
             Test::Support::EvalHelper::TestEvaluator.new(tracer_provider: @rig.tracer_provider, **kwargs)
           end
 
+          def noop_scorer
+            Braintrust::Scorer.new("noop") { 1.0 }
+          end
+
           def post_json(path, body)
             post path, JSON.generate(body), {"CONTENT_TYPE" => "application/json"}
           end
diff --git a/test/braintrust/eval/evaluator_test.rb b/test/braintrust/eval/evaluator_test.rb
index e6268363..a590039b 100644
--- a/test/braintrust/eval/evaluator_test.rb
+++ b/test/braintrust/eval/evaluator_test.rb
@@ -77,7 +77,10 @@ def test_run_delegates_to_eval_run
   end
 
   def test_run_passes_on_progress
-    evaluator = Braintrust::Eval::Evaluator.new(task: ->(input:) { input })
+    evaluator = Braintrust::Eval::Evaluator.new(
+      task: ->(input:) { input },
+      scorers: [Braintrust::Scorer.new("noop") { 1.0 }]
+    )
 
     progress_events = []
     cases = [{input: "a"}, {input: "b"}]
diff --git a/test/braintrust/eval/runner_test.rb b/test/braintrust/eval/runner_test.rb
index bd4b525a..7b021236 100644
--- a/test/braintrust/eval/runner_test.rb
+++ b/test/braintrust/eval/runner_test.rb
@@ -2065,3 +2065,276 @@ def test_runner_parameters_with_parallelism
     assert(params.all? { |p| p == {"model" => "gpt-4"} })
   end
 end
+
+class Braintrust::Eval::RunnerClassifierTest < Minitest::Test
+  def test_runner_with_classifiers_only
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [],
+      classifiers: [
+        Braintrust::Classifier.new("category") { |output:| {name: "category", id: "greeting", label: "Greeting"} }
+      ],
+      cases: [{input: "hello", expected: nil}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    result = Braintrust::Eval::Runner.new(context).run
+    assert result.success?
+    assert_equal({}, result.scores)
+    assert_equal({"category" => [{id: "greeting", label: "Greeting"}]}, result.classifications)
+  end
+
+  def test_runner_with_scorers_and_classifiers
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input.upcase },
+      scorers: [Braintrust::Scorer.new("exact") { |expected:, output:| (output == expected) ? 1.0 : 0.0 }],
+      classifiers: [
+        Braintrust::Classifier.new("category") { |**| {name: "category", id: "text"} }
+      ],
+      cases: [{input: "hello", expected: "HELLO"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    result = Braintrust::Eval::Runner.new(context).run
+    assert result.success?
+    assert_equal [1.0], result.scores["exact"]
+    assert_equal({"category" => [{id: "text"}]}, result.classifications)
+  end
+
+  def test_runner_classifier_nil_return_produces_no_classifications
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [],
+      classifiers: [
+        Braintrust::Classifier.new("maybe") { |**| nil }
+      ],
+      cases: [{input: "hello"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    result = Braintrust::Eval::Runner.new(context).run
+    assert result.success?
+    assert_nil result.classifications
+  end
+
+  def test_runner_classifier_error_does_not_abort_eval
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [Braintrust::Scorer.new("always_one") { 1.0 }],
+      classifiers: [
+        Braintrust::Classifier.new("broken") { |**| raise "classifier boom" }
+      ],
+      cases: [{input: "hello"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    result = Braintrust::Eval::Runner.new(context).run
+    # Eval continues running task and scorers, but classifier errors are surfaced.
+    refute result.success?
+    assert_equal 1, result.errors.length
+    assert_match(/Classifier 'broken' failed for input 'hello': classifier boom/, result.errors.first)
+    assert_equal [1.0], result.scores["always_one"]
+    assert_nil result.classifications
+  end
+
+  def test_runner_classifier_error_does_not_affect_other_classifiers
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [],
+      classifiers: [
+        Braintrust::Classifier.new("broken") { |**| raise "boom" },
+        Braintrust::Classifier.new("working") { |**| {name: "working", id: "ok"} }
+      ],
+      cases: [{input: "hello"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    result = Braintrust::Eval::Runner.new(context).run
+    refute result.success?
+    assert_equal 1, result.errors.length
+    assert_match(/Classifier 'broken' failed for input 'hello': boom/, result.errors.first)
+    assert_equal({"working" => [{id: "ok"}]}, result.classifications)
+  end
+
+  def test_runner_classifier_error_logged_to_span_metadata
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [],
+      classifiers: [
+        Braintrust::Classifier.new("broken") { |**| raise "classifier boom" }
+      ],
+      cases: [{input: "hello"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    Braintrust::Eval::Runner.new(context).run
+    spans = rig.drain
+
+    eval_span = spans.find { |s| s.name == "eval" }
+    refute_nil eval_span
+    metadata = JSON.parse(eval_span.attributes["braintrust.metadata"] || "{}")
+    assert_equal "classifier boom", metadata.dig("classifier_errors", "broken")
+  end
+
+  def test_runner_classifier_span_attributes
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [],
+      classifiers: [
+        Braintrust::Classifier.new("my_classifier") { |**| {name: "my_classifier", id: "foo"} }
+      ],
+      cases: [{input: "hello"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    Braintrust::Eval::Runner.new(context).run
+    spans = rig.drain
+
+    classifier_span = spans.find { |s| s.name == "my_classifier" }
+    refute_nil classifier_span
+    span_attrs = JSON.parse(classifier_span.attributes["braintrust.span_attributes"])
+    assert_equal "classifier", span_attrs["type"]
+    assert_equal "scorer", span_attrs["purpose"]
+    assert_equal "my_classifier", span_attrs["name"]
+  end
+
+  def test_runner_classifier_multi_label_result
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [],
+      classifiers: [
+        Braintrust::Classifier.new("sentiment") do |**|
+          [
+            {name: "sentiment", id: "positive", label: "Positive"},
+            {name: "sentiment", id: "enthusiastic", label: "Enthusiastic"}
+          ]
+        end
+      ],
+      cases: [{input: "great!"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    result = Braintrust::Eval::Runner.new(context).run
+    assert result.success?
+    sentiment = result.classifications["sentiment"]
+    assert_equal 2, sentiment.length
+    assert_equal({id: "positive", label: "Positive"}, sentiment[0])
+    assert_equal({id: "enthusiastic", label: "Enthusiastic"}, sentiment[1])
+  end
+
+  def test_runner_classifier_name_defaults_to_function_name
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [],
+      classifiers: [
+        Braintrust::Classifier.new("my_classifier") { |**| {id: "foo"} } # no :name in result
+      ],
+      cases: [{input: "hello"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    result = Braintrust::Eval::Runner.new(context).run
+    assert result.success?
+    assert result.classifications.key?("my_classifier")
+  end
+
+  def test_runner_classifications_logged_to_eval_span
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [],
+      classifiers: [
+        Braintrust::Classifier.new("category") { |**| {name: "category", id: "greeting"} }
+      ],
+      cases: [{input: "hello"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    Braintrust::Eval::Runner.new(context).run
+    spans = rig.drain
+
+    eval_span = spans.find { |s| s.name == "eval" }
+    refute_nil eval_span
+    raw = eval_span.attributes["braintrust.classifications"]
+    refute_nil raw
+    classifications = JSON.parse(raw)
+    assert_equal [{"id" => "greeting"}], classifications["category"]
+  end
+
+  def test_runner_classifications_nil_when_no_classifiers
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input.upcase },
+      scorers: [Braintrust::Scorer.new("exact") { 1.0 }],
+      cases: [{input: "hello"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    result = Braintrust::Eval::Runner.new(context).run
+    assert result.success?
+    assert_nil result.classifications
+  end
+
+  def test_runner_multiple_cases_accumulate_classifications
+    rig = setup_otel_test_rig
+
+    context = Braintrust::Eval::Context.build(
+      task: ->(input:) { input },
+      scorers: [],
+      classifiers: [
+        Braintrust::Classifier.new("category") { |input:| {name: "category", id: (input.length > 3) ? "long" : "short"} }
+      ],
+      cases: [{input: "hi"}, {input: "hello"}, {input: "ok"}],
+      experiment_id: "exp-123",
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    result = Braintrust::Eval::Runner.new(context).run
+    assert result.success?
+    category = result.classifications["category"]
+    assert_equal 3, category.length
+  end
+end
diff --git a/test/braintrust/eval_test.rb b/test/braintrust/eval_test.rb
index bef2a2cf..6a11d008 100644
--- a/test/braintrust/eval_test.rb
+++ b/test/braintrust/eval_test.rb
@@ -1384,4 +1384,52 @@ def test_eval_run_with_parameters
       end
     end
   end
+
+  # ============================================
+  # Classifier validation tests
+  # ============================================
+
+  def test_eval_run_requires_at_least_scorers_or_classifiers
+    error = assert_raises(ArgumentError) do
+      Braintrust::Eval.run(
+        cases: [{input: "hello"}],
+        task: ->(input:) { input }
+      )
+    end
+    assert_match(/at least one of scorers or classifiers is required/i, error.message)
+  end
+
+  def test_eval_run_requires_at_least_scorers_or_classifiers_when_empty_arrays
+    error = assert_raises(ArgumentError) do
+      Braintrust::Eval.run(
+        cases: [{input: "hello"}],
+        task: ->(input:) { input },
+        scorers: [],
+        classifiers: []
+      )
+    end
+    assert_match(/at least one of scorers or classifiers is required/i, error.message)
+  end
+
+  def test_eval_run_with_classifiers_only_no_scorers
+    rig = setup_otel_test_rig
+
+    result = run_test_eval(
+      experiment_id: "exp-123",
+      experiment_name: "classifier-only",
+      project_id: "proj-456",
+      project_name: "test-project",
+      cases: [{input: "hello"}],
+      task: ->(input:) { input },
+      classifiers: [
+        Braintrust::Classifier.new("category") { |**| {name: "category", id: "greeting"} }
+      ],
+      state: rig.state,
+      tracer_provider: rig.tracer_provider
+    )
+
+    assert result.success?
+    assert_equal({}, result.scores)
+    assert_equal({"category" => [{id: "greeting"}]}, result.classifications)
+  end
 end
diff --git a/test/braintrust/server/handlers/eval_test.rb b/test/braintrust/server/handlers/eval_test.rb
index dce8a868..884d5199 100644
--- a/test/braintrust/server/handlers/eval_test.rb
+++ b/test/braintrust/server/handlers/eval_test.rb
@@ -59,7 +59,7 @@ def test_returns_400_for_multiple_data_sources
         # --- SSE streaming ---
 
         def test_returns_200_with_sse_content_type
-          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input })
+          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input }, scorers: [noop_scorer])
 
           status, headers, _ = handler.call(rack_json_env(
             {name: "test-eval", data: {data: [{input: "hello"}]}, experiment_name: "exp"},
@@ -73,7 +73,7 @@ def test_returns_200_with_sse_content_type
         end
 
         def test_streams_progress_event_per_case
-          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase })
+          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase }, scorers: [noop_scorer])
 
           _, _, body = handler.call(rack_json_env(
             {name: "upcase-eval", data: {data: [{input: "a"}, {input: "b"}, {input: "c"}]}, experiment_name: "exp"},
@@ -88,7 +88,7 @@ def test_streams_progress_event_per_case
         end
 
         def test_progress_event_contains_protocol_fields
-          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase })
+          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase }, scorers: [noop_scorer])
 
           _, _, body = handler.call(rack_json_env(
             {name: "upcase-eval", data: {data: [{input: "hello"}]}, experiment_name: "exp"},
@@ -108,7 +108,7 @@ def test_progress_event_contains_protocol_fields
         end
 
         def test_progress_event_contains_task_output_as_json_string
-          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase })
+          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase }, scorers: [noop_scorer])
 
           _, _, body = handler.call(rack_json_env(
             {name: "upcase-eval", data: {data: [{input: "hello"}]}, experiment_name: "exp"},
@@ -165,7 +165,7 @@ def test_summary_event_contains_scores_and_experiment_name
         end
 
         def test_stream_ends_with_done
-          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input })
+          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input }, scorers: [noop_scorer])
 
           _, _, body = handler.call(rack_json_env(
             {name: "test-eval", data: {data: [{input: "x"}]}, experiment_name: "exp"},
@@ -177,7 +177,7 @@ def test_stream_ends_with_done
         end
 
         def test_task_error_still_emits_progress_and_done
-          @evaluators["failing-eval"] = test_evaluator(task: -> { raise "boom" })
+          @evaluators["failing-eval"] = test_evaluator(task: -> { raise "boom" }, scorers: [noop_scorer])
 
           _, _, body = handler.call(rack_json_env(
             {name: "failing-eval", data: {data: [{input: "x"}]}, experiment_name: "exp"},
@@ -191,7 +191,7 @@ def test_task_error_still_emits_progress_and_done
         end
 
         def test_task_error_progress_contains_error_event
-          @evaluators["failing-eval"] = test_evaluator(task: -> { raise "task exploded" })
+          @evaluators["failing-eval"] = test_evaluator(task: -> { raise "task exploded" }, scorers: [noop_scorer])
 
           _, _, body = handler.call(rack_json_env(
             {name: "failing-eval", data: {data: [{input: "x"}]}, experiment_name: "exp"},
@@ -239,7 +239,7 @@ def test_accepts_dataset_id_as_sole_data_source
         # --- Auth passthrough ---
 
         def test_build_state_returns_nil_without_auth
-          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input })
+          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input }, scorers: [noop_scorer])
 
           env = rack_json_env(
             {name: "test-eval", data: {data: [{input: "hello"}]}},
@@ -252,7 +252,7 @@ def test_build_state_returns_nil_without_auth
         end
 
         def test_build_state_returns_nil_for_non_hash_auth
-          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input })
+          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input }, scorers: [noop_scorer])
 
           env = rack_json_env(
             {name: "test-eval", data: {data: [{input: "hello"}]}},
@@ -442,7 +442,7 @@ def test_handler_resolves_scores_to_scorer_ids
         # --- Server-specific body selection ---
 
         def test_returns_sse_body_without_protocol_http_request
-          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input })
+          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input }, scorers: [noop_scorer])
 
           _, _, body = handler.call(rack_json_env(
             {name: "test-eval", data: {data: [{input: "x"}]}, experiment_name: "exp"},
@@ -453,7 +453,7 @@ def test_returns_sse_body_without_protocol_http_request
         end
 
         def test_returns_sse_stream_body_with_protocol_http_request
-          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input })
+          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input }, scorers: [noop_scorer])
 
           env = rack_json_env(
             {name: "test-eval", data: {data: [{input: "x"}]}, experiment_name: "exp"},
@@ -470,7 +470,7 @@ def test_returns_sse_stream_body_with_protocol_http_request
         # --- Parent passthrough ---
 
         def test_handler_passes_parent_through
-          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input })
+          @evaluators["test-eval"] = test_evaluator(task: ->(input:) { input }, scorers: [noop_scorer])
 
           _, _, body = handler.call(rack_json_env(
             {
@@ -494,6 +494,10 @@ def test_evaluator(**kwargs)
           Test::Support::EvalHelper::TestEvaluator.new(tracer_provider: @rig.tracer_provider, **kwargs)
         end
 
+        def noop_scorer
+          Braintrust::Scorer.new("noop") { 1.0 }
+        end
+
         def handler
           Braintrust::Server::Handlers::Eval.new(@evaluators)
         end
diff --git a/test/braintrust/server/rack/eval_endpoint_test.rb b/test/braintrust/server/rack/eval_endpoint_test.rb
index a443e066..8e943ffe 100644
--- a/test/braintrust/server/rack/eval_endpoint_test.rb
+++ b/test/braintrust/server/rack/eval_endpoint_test.rb
@@ -21,7 +21,7 @@ def app
         end
 
         def test_streams_sse_events_for_inline_data
-          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase })
+          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase }, scorers: [noop_scorer])
 
           post_json "/eval", {
             name: "upcase-eval",
@@ -52,7 +52,7 @@ def test_streams_sse_events_for_inline_data
         end
 
         def test_progress_events_contain_output
-          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase })
+          @evaluators["upcase-eval"] = test_evaluator(task: ->(input:) { input.to_s.upcase }, scorers: [noop_scorer])
 
           post_json "/eval", {
             name: "upcase-eval",
@@ -94,7 +94,8 @@ def test_summary_event_contains_scores
 
         def test_error_still_emits_progress_and_done
           @evaluators["failing-eval"] = test_evaluator(
-            task: -> { raise "task exploded" }
+            task: -> { raise "task exploded" },
+            scorers: [noop_scorer]
           )
 
           post_json "/eval", {
@@ -167,7 +168,8 @@ def test_parameters_forwarded_to_task
             task: ->(input:, parameters:) {
               prefix = parameters["greeting"] || "hey"
               "#{prefix} #{input}"
-            }
+            },
+            scorers: [noop_scorer]
           )
 
           post_json "/eval", {
@@ -200,6 +202,10 @@ def test_evaluator(**kwargs)
           Test::Support::EvalHelper::TestEvaluator.new(tracer_provider: @rig.tracer_provider, **kwargs)
         end
 
+        def noop_scorer
+          Braintrust::Scorer.new("noop") { 1.0 }
+        end
+
         def post_json(path, body)
           post path, JSON.generate(body), {"CONTENT_TYPE" => "application/json"}
         end
diff --git a/test/braintrust/server/services/eval_service_test.rb b/test/braintrust/server/services/eval_service_test.rb
index 5320d5f7..4f5a32d1 100644
--- a/test/braintrust/server/services/eval_service_test.rb
+++ b/test/braintrust/server/services/eval_service_test.rb
@@ -92,7 +92,7 @@ def test_validate_accepts_dataset_name
         # --- stream ---
 
         def test_stream_emits_progress_and_done_events
-          @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase })
+          @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }, scorers: [noop_scorer])
           s = service
           validated = s.validate({
             "name" => "upcase-eval",
@@ -129,7 +129,7 @@ def test_stream_emits_summary_with_scores
         end
 
         def test_stream_emits_error_progress_on_task_failure
-          @evaluators["failing-eval"] = test_evaluator(task: ->(_input) { raise "boom" })
+          @evaluators["failing-eval"] = test_evaluator(task: ->(_input) { raise "boom" }, scorers: [noop_scorer])
           s = service
           validated = s.validate({
             "name" => "failing-eval",
@@ -330,6 +330,10 @@ def test_evaluator(**kwargs)
           Test::Support::EvalHelper::TestEvaluator.new(tracer_provider: @rig.tracer_provider, **kwargs)
         end
 
+        def noop_scorer
+          Braintrust::Scorer.new("noop") { 1.0 }
+        end
+
         def collect_streamed_events(svc, validated, auth: nil)
           chunks = []
           sse = Braintrust::Server::SSEWriter.new { |chunk| chunks << chunk }
diff --git a/test/support/braintrust_helper.rb b/test/support/braintrust_helper.rb
index 49d57eb1..8f355ee1 100644
--- a/test/support/braintrust_helper.rb
+++ b/test/support/braintrust_helper.rb
@@ -75,11 +75,13 @@ def get_integration_test_api(**options)
 
       # Helper to run eval internally without API calls for testing
       # @param state [State] Braintrust state
-      def run_test_eval(experiment_id:, experiment_name:, project_id:, project_name:,
-        cases:, task:, scorers:, state:, parallelism: 1, tracer_provider: nil)
+      def run_test_eval(experiment_id:, experiment_name:, project_id:,
+        project_name:, cases:, task:, state:, scorers: [], classifiers: [],
+        parallelism: 1, tracer_provider: nil)
         context = Braintrust::Eval::Context.build(
           task: task,
           scorers: scorers,
+          classifiers: classifiers,
           cases: cases,
           experiment_id: experiment_id,
           experiment_name: experiment_name,