bufbuild · pkwarren · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,121 @@
+# Benchmarks
+
+JMH microbenchmarks for protovalidate-java.
+Used locally to quantify performance changes.
+Not executed in CI; `./gradlew build` only verifies that benchmark code compiles.
+
+## Prerequisites
+
+- JDK 21
+- `buf` CLI (installed automatically by Gradle)
+- `jq` and `column` (preinstalled on macOS)
+
+## Running benchmarks
+
+Run all benchmarks:
+
+```
+./gradlew :benchmarks:jmh
+```
+
+Filter to a subset via `-Pbench` (accepts a regex over method names):
+
+```
+./gradlew :benchmarks:jmh -Pbench=validateSimple     # one method
+./gradlew :benchmarks:jmh -Pbench='compile.*'        # prefix match
+./gradlew :benchmarks:jmh -Pbench='validate.*'       # all steady-state
+```
+
+Results land in `build/results/jmh/results.json`.
+
+## Comparing before and after a change
+
+Typical A/B workflow:
+
+```
+# 1. run baseline on the current tree and save it
+./gradlew :benchmarks:jmh -Pbench='compile.*' :benchmarks:jmhSaveBaseline
+
+# 2. apply your change (edit code, or gh pr checkout <N>)
+
+# 3. re-run and diff against the saved baseline
+./gradlew :benchmarks:jmh -Pbench='compile.*' :benchmarks:jmhCompare
+```
+
+Output:
+
+```
+benchmark                    metric  before            after             delta
+compileValidatorForRepeated  time    4696209.43 ns/op  1064942.21 ns/op  -77.3%
+compileValidatorForRepeated  alloc   12950196.95 B/op  3262651.61 B/op   -74.8%
+```
+
+`jmhSaveBaseline` copies the current `results.json` to `results-before.json`.
+`jmhCompare` diffs `results-before.json` against `results.json` by default.
+Pass explicit paths with `-Pbefore=<path> -Pafter=<path>`.
+
+## Adding a new benchmark
+
+Benchmarks live in `src/jmh/java/...` and target proto messages in `src/jmh/proto/...`.
+
+### 1. Define (or reuse) a proto message
+
+Edit `src/jmh/proto/bench/v1/bench.proto` to add a message that exercises the code path you want to measure.
+`buf generate` runs automatically before `compileJmhJava`, so no separate codegen step is needed.
+
+### 2. Add a `@Benchmark` method
+
+Edit `src/jmh/java/build/buf/protovalidate/benchmarks/ValidationBenchmark.java`.
+Put one-time state (validator, messages) in `@Setup` and the measured work in the `@Benchmark` method.
+
+Steady-state (hot-path) pattern:
+
+```java
+@Benchmark
+public void validateMyMessage(Blackhole bh) throws ValidationException {
+  bh.consume(validator.validate(myMessage));
+}
+```
+
+Cold/compile-path pattern (each iteration builds a fresh validator):
+
+```java
+@Benchmark
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+public void compileValidatorForMyMessage(Blackhole bh) throws CompilationException {
+  Validator v = ValidatorFactory.newBuilder()
+      .buildWithDescriptors(Collections.singletonList(MyMessage.getDescriptor()), false);
+  bh.consume(v);
+}
+```
+
+Choose based on what the change you want to measure actually touches.
+`EvaluatorBuilder` caches compiled evaluators per descriptor, so after the first `validate()` call, further calls skip compilation.
+If your fix is in the compile path (e.g. `RuleCache`, `DescriptorCacheBuilder`), a steady-state benchmark will not show the effect because `@Setup` absorbs it.
+
+## Configuration
+
+`build.gradle.kts` holds the JMH plugin config.
+Defaults are tuned for fast local iteration (~30s per benchmark):
+
+- 3 warmup iterations of 2s each
+- 5 measurement iterations of 2s each
+- 2 forks
+- Average-time mode, nanoseconds
+- GC profiler on (`gc.alloc.rate.norm` for per-op allocations)
+
+For higher-confidence numbers (tighter confidence intervals, useful for deltas under ~10%), bump `fork`, `warmup`, and `timeOnIteration` in the `jmh {}` block.
+Expect ~5 min per benchmark at `fork=5, warmup=5s, timeOnIteration=5s`.
+
+## Metrics
+
+Each benchmark emits:
+
+- **Primary:** average time per `@Benchmark` invocation (`ns/op` by default).
+- **Secondary (GC profiler):**
+  - `gc.alloc.rate.norm` - bytes allocated per op; deterministic, used by `jmhCompare`.
+  - `gc.alloc.rate` - allocation rate in MB/sec; varies with CPU.
+  - `gc.count` / `gc.time` - GC activity during the run.
+
+For allocation flame graphs, uncomment the `async` profiler line in `build.gradle.kts`.
+Requires `async-profiler` installed locally.
diff --git a/benchmarks/buf.gen.yaml b/benchmarks/buf.gen.yaml
@@ -0,0 +1,6 @@
+version: v2
+plugins:
+  - remote: buf.build/protocolbuffers/java:$protocJavaPluginVersion
+    out: build/generated/sources/bufgen
+inputs:
+  - directory: src/jmh/proto
diff --git a/benchmarks/buf.lock b/benchmarks/buf.lock
@@ -0,0 +1,6 @@
+# Generated by buf. DO NOT EDIT.
+version: v2
+deps:
+  - name: buf.build/bufbuild/protovalidate
+    commit: 50325440f8f24053b047484a6bf60b76
+    digest: b5:74cb6f5c0853c3c10aafc701614194bbd63326bdb8ef4068214454b8894b03ba4113e04b3a33a8321cdf05336e37db4dc14a5e2495db8462566914f36086ba31
diff --git a/benchmarks/buf.yaml b/benchmarks/buf.yaml
@@ -0,0 +1,5 @@
+version: v2
+modules:
+  - path: src/jmh/proto
+deps:
+  - buf.build/bufbuild/protovalidate
diff --git a/benchmarks/build.gradle.kts b/benchmarks/build.gradle.kts
@@ -0,0 +1,137 @@
+plugins {
+    java
+    alias(libs.plugins.jmh)
+    alias(libs.plugins.osdetector)
+}
+
+// JMH can use modern bytecode; benchmarks aren't shipped.
+java {
+    sourceCompatibility = JavaVersion.VERSION_21
+    targetCompatibility = JavaVersion.VERSION_21
+}
+
+val buf: Configuration by configurations.creating
+
+tasks.register("configureBuf") {
+    description = "Installs the Buf CLI."
+    File(buf.asPath).setExecutable(true)
+}
+
+tasks.register<Copy>("filterBufGenYaml") {
+    from(files("buf.gen.yaml"))
+    includeEmptyDirs = false
+    into(layout.buildDirectory.dir("buf-gen-templates"))
+    expand("protocJavaPluginVersion" to "v${libs.versions.protobuf.get().substringAfter('.')}")
+    filteringCharset = "UTF-8"
+}
+
+tasks.register<Exec>("generateBenchmarkSources") {
+    dependsOn("configureBuf", "filterBufGenYaml")
+    description = "Generates Java sources for benchmark protos via buf generate."
+    val template = layout.buildDirectory.file("buf-gen-templates/buf.gen.yaml")
+    inputs.files(buf)
+    inputs.dir("src/jmh/proto")
+    inputs.file("buf.yaml")
+    inputs.file(template)
+    outputs.dir(layout.buildDirectory.dir("generated/sources/bufgen"))
+    commandLine(buf.asPath, "generate", "--template", template.get().asFile.absolutePath)
+}
+
+sourceSets {
+    named("jmh") {
+        java {
+            srcDir(layout.buildDirectory.dir("generated/sources/bufgen"))
+        }
+    }
+}
+
+tasks.matching { it.name == "compileJmhJava" }.configureEach {
+    dependsOn("generateBenchmarkSources")
+}
+
+// Ensure `./gradlew build` (and `make build`) compiles the JMH sources so CI
+// catches breakages in benchmark code. Execution remains gated behind the
+// explicit `:benchmarks:jmh` task.
+tasks.named("build") {
+    dependsOn("compileJmhJava")
+}
+
+dependencies {
+    jmhImplementation(project(":"))
+    jmhImplementation(libs.protobuf.java)
+    buf("build.buf:buf:${libs.versions.buf.get()}:${osdetector.classifier}@exe")
+}
+
+// Benchmarks produce fresh timing data each run; disable Gradle's up-to-date
+// check so the task always executes (otherwise -Pbench changes are ignored).
+tasks.named("jmh") {
+    outputs.upToDateWhen { false }
+}
+
+jmh {
+    // Defaults tuned for fast local A/B runs (~90s total).
+    // For higher-confidence numbers bump iteration time and fork count.
+    warmupIterations.set(3)
+    warmup.set("2s")
+    iterations.set(5)
+    timeOnIteration.set("2s")
+    fork.set(2)
+    timeUnit.set("ns")
+    benchmarkMode.set(listOf("avgt"))
+    resultFormat.set("JSON")
+    // GC profiler reports bytes allocated per op (gc.alloc.rate.norm), which
+    // jmhCompare can diff alongside timing. ~5-10% overhead on timings.
+    profilers.set(listOf("gc"))
+    // For allocation flame graphs (requires async-profiler installed locally):
+    // profilers.set(listOf("async:event=alloc;output=flamegraph;dir=build/reports/jmh/async"))
+
+    // Filter to a subset of benchmarks via `-Pbench=<regex>`. Example:
+    //   ./gradlew :benchmarks:jmh -Pbench=validateSimple
+    //   ./gradlew :benchmarks:jmh -Pbench='compile.*'
+    project.findProperty("bench")?.toString()?.let {
+        includes.set(listOf(it))
+    }
+}
+
+val jmhResults = layout.buildDirectory.file("results/jmh/results.json")
+val jmhBaseline = layout.buildDirectory.file("results/jmh/results-before.json")
+
+// Saves the latest JMH results.json as the baseline for jmhCompare.
+//
+// Usage:
+//   ./gradlew :benchmarks:jmh :benchmarks:jmhSaveBaseline
+//   # apply change...
+//   ./gradlew :benchmarks:jmh :benchmarks:jmhCompare
+tasks.register<Copy>("jmhSaveBaseline") {
+    description = "Copies the latest JMH results.json to results-before.json as the baseline."
+    from(jmhResults)
+    into(jmhResults.get().asFile.parentFile)
+    rename { "results-before.json" }
+    mustRunAfter("jmh")
+}
+
+// Diffs two JMH results.json files as a concise benchstat-style table.
+// Defaults to comparing results-before.json (written by jmhSaveBaseline)
+// against the latest results.json.
+//
+// Override paths:
+//   ./gradlew :benchmarks:jmhCompare -Pbefore=a.json -Pafter=b.json
+tasks.register<Exec>("jmhCompare") {
+    description = "Diffs two JMH result JSON files as a concise table."
+    val before =
+        project.findProperty("before")?.toString()
+            ?: jmhBaseline.get().asFile.absolutePath
+    val after =
+        project.findProperty("after")?.toString()
+            ?: jmhResults.get().asFile.absolutePath
+    val jqScript = file("jmh-compare.jq").absolutePath
+    commandLine(
+        "bash",
+        "-c",
+        "jq --slurp --raw-output --from-file \"\$1\" \"\$2\" \"\$3\" | column -t -s \$'\\t'",
+        "jmh-compare", // $0
+        jqScript, // $1
+        before, // $2
+        after, // $3
+    )
+}
diff --git a/benchmarks/jmh-compare.jq b/benchmarks/jmh-compare.jq
@@ -0,0 +1,24 @@
+def pct(a; b):
+  if a == null or b == null or b == 0 then "~"
+  else (((a - b) / b * 100) * 10 | round / 10) as $d
+    | if $d > 0 then "+\($d)%" elif $d == 0 then "~" else "\($d)%" end
+  end;
+def num(x):
+  if x == null then "-"
+  else (x * 100 | round / 100 | tostring)
+  end;
+
+def extract: map({
+  key: (.benchmark | split(".") | last),
+  time: .primaryMetric.score,
+  time_unit: .primaryMetric.scoreUnit,
+  alloc: (.secondaryMetrics["·gc.alloc.rate.norm"].score // null)
+});
+
+(.[0] | extract) as $b
+| (.[1] | extract) as $a
+| (["benchmark", "metric", "before", "after", "delta"] | @tsv),
+  ($b[] | . as $bi
+        | ($a[] | select(.key == $bi.key)) as $ai
+        | ([$bi.key, "time",  "\(num($bi.time)) \($bi.time_unit)", "\(num($ai.time)) \($ai.time_unit)", pct($ai.time; $bi.time)] | @tsv),
+          ([$bi.key, "alloc", "\(num($bi.alloc)) B/op", "\(num($ai.alloc)) B/op", pct($ai.alloc; $bi.alloc)] | @tsv))