From 6224d4799dd0d0065ec55f054e62ef6f7a2dabf5 Mon Sep 17 00:00:00 2001 From: Mark Phelps Date: Fri, 5 Jun 2026 09:25:28 -0400 Subject: [PATCH 1/3] test: add fuzzing for input type resolution and OpenAPI generation Add fuzz coverage for the schema-gen codepaths exercised by union inputs, which previously had no fuzzing and no validity oracle: - FuzzResolveInputType: feeds arbitrary TypeAnnotation trees through ResolveInputType, then validates that any successfully resolved input type generates an OpenAPI document the build-time validator accepts. - FuzzInputTypeJSONSchema: builds arbitrary InputType trees directly (reaching shapes the resolver never produces) and validates the generated OpenAPI document. Both use an assertValidOpenAPI oracle (the same kin-openapi validator as writeAndValidateSchema), so a union shape that resolves cleanly but emits an invalid schema (e.g. an unsupported `type: null` branch) fails the fuzzer rather than surfacing as a confusing user build error. Make the test:fuzz task auto-discover every Fuzz* target via `go test -list` instead of hardcoding names, so new fuzz tests are picked up automatically. This also surfaced targets the hardcoded list missed (pkg/config's three targets and two parser helpers): 11 total vs the 4 previously run. Manage jq (used for discovery) as a mise tool and simplify the CI fuzz-go job to call the task. --- .github/workflows/ci.yaml | 14 +- mise.lock | 39 +++++ mise.toml | 41 +++-- pkg/schema/input_type_fuzz_test.go | 246 +++++++++++++++++++++++++++++ 4 files changed, 318 insertions(+), 22 deletions(-) create mode 100644 pkg/schema/input_type_fuzz_test.go diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d3f82eadb0..025dfebda0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -440,7 +440,9 @@ jobs: fuzz-go: name: Fuzz Go runs-on: ubuntu-latest - timeout-minutes: 10 + # test:fuzz auto-discovers every Fuzz* target and runs each for FUZZTIME + # (30s default), so this scales as targets are added. + timeout-minutes: 15 env: CGO_ENABLED: "1" steps: @@ -449,14 +451,8 @@ jobs: with: version: 2026.4.27 cache_key_prefix: mise-ci-${{ github.job }} - - name: Fuzz schema type resolution - run: go test ./pkg/schema/ -run='^$' -fuzz=FuzzResolveSchemaType -fuzztime=30s - - name: Fuzz JSON schema generation - run: go test ./pkg/schema/ -run='^$' -fuzz=FuzzJSONSchema -fuzztime=30s - - name: Fuzz Python parser - run: go test ./pkg/schema/python/ -run='^$' -fuzz=FuzzParsePredictor -fuzztime=30s - - name: Fuzz type annotation parsing - run: go test ./pkg/schema/python/ -run='^$' -fuzz=FuzzParseTypeAnnotation -fuzztime=30s + - name: Fuzz all targets (auto-discovered) + run: mise run test:fuzz test-rust: name: Test Rust diff --git a/mise.lock b/mise.lock index 5c6661efd8..3fe3fb9ec2 100644 --- a/mise.lock +++ b/mise.lock @@ -103,6 +103,45 @@ url = "https://github.com/gotestyourself/gotestsum/releases/download/v1.13.0/got checksum = "sha256:fd5a6dc69e46a0970593e70d85a7e75f16714e9c61d6d72ccc324eb82df5bb8a" url = "https://github.com/gotestyourself/gotestsum/releases/download/v1.13.0/gotestsum_1.13.0_windows_amd64.tar.gz" +[[tools."aqua:jqlang/jq"]] +version = "1.8.1" +backend = "aqua:jqlang/jq" + +[tools."aqua:jqlang/jq"."platforms.linux-arm64"] +checksum = "sha256:6bc62f25981328edd3cfcfe6fe51b073f2d7e7710d7ef7fcdac28d4e384fc3d4" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-arm64" +provenance = "github-attestations" + +[tools."aqua:jqlang/jq"."platforms.linux-arm64-musl"] +checksum = "sha256:6bc62f25981328edd3cfcfe6fe51b073f2d7e7710d7ef7fcdac28d4e384fc3d4" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-arm64" +provenance = "github-attestations" + +[tools."aqua:jqlang/jq"."platforms.linux-x64"] +checksum = "sha256:020468de7539ce70ef1bceaf7cde2e8c4f2ca6c3afb84642aabc5c97d9fc2a0d" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-amd64" +provenance = "github-attestations" + +[tools."aqua:jqlang/jq"."platforms.linux-x64-musl"] +checksum = "sha256:020468de7539ce70ef1bceaf7cde2e8c4f2ca6c3afb84642aabc5c97d9fc2a0d" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-amd64" +provenance = "github-attestations" + +[tools."aqua:jqlang/jq"."platforms.macos-arm64"] +checksum = "sha256:a9fe3ea2f86dfc72f6728417521ec9067b343277152b114f4e98d8cb0e263603" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-macos-arm64" +provenance = "github-attestations" + +[tools."aqua:jqlang/jq"."platforms.macos-x64"] +checksum = "sha256:e80dbe0d2a2597e3c11c404f03337b981d74b4a8504b70586c354b7697a7c27f" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-macos-amd64" +provenance = "github-attestations" + +[tools."aqua:jqlang/jq"."platforms.windows-x64"] +checksum = "sha256:23cb60a1354eed6bcc8d9b9735e8c7b388cd1fdcb75726b93bc299ef22dd9334" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-windows-amd64.exe" +provenance = "github-attestations" + [[tools."aqua:mitsuhiko/insta"]] version = "1.46.0" backend = "aqua:mitsuhiko/insta" diff --git a/mise.toml b/mise.toml index f7a6fe99ab..e25f278c84 100644 --- a/mise.toml +++ b/mise.toml @@ -53,6 +53,7 @@ ruff = "0.14.13" ty = "0.0.10" "npm:prettier" = "3.6.2" "npm:markdownlint-cli2" = "0.22.0" +"aqua:jqlang/jq" = "1.8.1" "go:golang.org/x/tools/cmd/goimports" = "latest" zig = "0.15.2" @@ -325,21 +326,35 @@ depends = ["build:coglet:wheel"] run = "nox -s coglet" [tasks."test:fuzz"] -description = "Run Go fuzz tests (FUZZTIME=30s per target by default)" -run = """ +description = "Run all Go fuzz tests (auto-discovered; FUZZTIME=30s per target by default)" +run = ''' #!/usr/bin/env bash -set -e +set -euo pipefail FUZZTIME="${FUZZTIME:-30s}" -echo "Fuzzing schema type resolution ($FUZZTIME)..." -go test ./pkg/schema/ -run='^$' -fuzz=FuzzResolveSchemaType -fuzztime="$FUZZTIME" -echo "Fuzzing JSON schema generation ($FUZZTIME)..." -go test ./pkg/schema/ -run='^$' -fuzz=FuzzJSONSchema -fuzztime="$FUZZTIME" -echo "Fuzzing Python parser ($FUZZTIME)..." -go test ./pkg/schema/python/ -run='^$' -fuzz=FuzzParsePredictor -fuzztime="$FUZZTIME" -echo "Fuzzing type annotation parsing ($FUZZTIME)..." -go test ./pkg/schema/python/ -run='^$' -fuzz=FuzzParseTypeAnnotation -fuzztime="$FUZZTIME" -echo "All fuzz targets passed." -""" + +# Auto-discover every Fuzz* target and the package it lives in, so new fuzz +# tests are picked up automatically without editing this task. go test only +# fuzzes one target per invocation, so we still loop and run them one at a time. +# jq emits " " per line; package and target names never +# contain spaces, so `read pkg target` splits them cleanly. +count=0 +while read -r pkg target; do + [ -z "$pkg" ] && continue + echo "Fuzzing $target in $pkg ($FUZZTIME)..." + go test "$pkg" -run="^$" -fuzz="^${target}$" -fuzztime="$FUZZTIME" + count=$((count + 1)) +done < <( + go test -list "^Fuzz" -json ./... 2>/dev/null \ + | jq -r 'select(.Action=="output" and (.Output|test("^Fuzz"))) | .Package + " " + (.Output | rtrimstr("\n"))' +) + +if [ "$count" -eq 0 ]; then + echo "No fuzz targets found." >&2 + exit 1 +fi + +echo "All $count fuzz targets passed." +''' [tasks."test:integration"] description = "Run integration tests (skips slow tests by default, set SHORT=0 for full suite)" diff --git a/pkg/schema/input_type_fuzz_test.go b/pkg/schema/input_type_fuzz_test.go new file mode 100644 index 0000000000..cf59da0b88 --- /dev/null +++ b/pkg/schema/input_type_fuzz_test.go @@ -0,0 +1,246 @@ +package schema + +import ( + "context" + "testing" + + "github.com/getkin/kin-openapi/openapi3" +) + +// FuzzResolveInputType builds arbitrary TypeAnnotation trees from fuzz input +// and verifies that ResolveInputType never panics. When resolution succeeds, +// it feeds the resulting InputType through OpenAPI generation and validates +// the emitted document with the same kin-openapi validator used at build time +// (writeAndValidateSchema). This is the key oracle: a union input type that +// resolves cleanly but emits an OpenAPI document the build-time validator +// rejects (e.g. an unsupported `type: null` branch) is a real bug, not just a +// panic. +func FuzzResolveInputType(f *testing.F) { + // Seed corpus — union and JSON-native input shapes, plus tricky cases. + seeds := []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "str"}, + {Kind: TypeAnnotSimple, Name: "int"}, + {Kind: TypeAnnotSimple, Name: "float"}, + {Kind: TypeAnnotSimple, Name: "bool"}, + {Kind: TypeAnnotSimple, Name: "dict"}, + {Kind: TypeAnnotSimple, Name: "Any"}, + // str | float + {Kind: TypeAnnotUnion, Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "str"}, + {Kind: TypeAnnotSimple, Name: "float"}, + }}, + // str | float | None + {Kind: TypeAnnotUnion, Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "str"}, + {Kind: TypeAnnotSimple, Name: "float"}, + {Kind: TypeAnnotSimple, Name: "None"}, + }}, + // str | None (single-variant collapse to nullable) + {Kind: TypeAnnotUnion, Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "str"}, + {Kind: TypeAnnotSimple, Name: "None"}, + }}, + // int | float + {Kind: TypeAnnotUnion, Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "int"}, + {Kind: TypeAnnotSimple, Name: "float"}, + }}, + // list[int] | list[float] + {Kind: TypeAnnotUnion, Args: []TypeAnnotation{ + {Kind: TypeAnnotGeneric, Name: "list", Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "int"}, + }}, + {Kind: TypeAnnotGeneric, Name: "list", Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "float"}, + }}, + }}, + // dict | list[dict] + {Kind: TypeAnnotUnion, Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "dict"}, + {Kind: TypeAnnotGeneric, Name: "list", Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "dict"}, + }}, + }}, + // Unsupported member: Path | str (must be rejected, not panic) + {Kind: TypeAnnotUnion, Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "Path"}, + {Kind: TypeAnnotSimple, Name: "str"}, + }}, + // Optional[str] + {Kind: TypeAnnotGeneric, Name: "Optional", Args: []TypeAnnotation{ + {Kind: TypeAnnotSimple, Name: "str"}, + }}, + } + for _, s := range seeds { + f.Add(encodeAnnotation(s)) + } + + ctx := NewImportContext() + typedDicts := map[string]bool{} + + f.Fuzz(func(t *testing.T, data []byte) { + ann, _ := decodeAnnotation(data, 0, 0) + + // Must not panic regardless of input. + it, ft, err := ResolveInputType(ann, ctx, typedDicts) + if err != nil { + return + } + + // A resolved input type must build a field that validates and emits a + // valid OpenAPI document. + field := InputField{ + Name: "value", + Order: 0, + FieldType: ft, + InputType: &it, + } + if err := ValidateInputField(field); err != nil { + return + } + + inputs := NewOrderedMap[string, InputField]() + inputs.Set("value", field) + out, err := GenerateOpenAPISchema(&PredictorInfo{ + Inputs: inputs, + Output: SchemaPrim(TypeString), + Mode: ModePredict, + }) + if err != nil { + return + } + + // Oracle: the generated schema must be a valid OpenAPI document, the + // same check writeAndValidateSchema performs at build time. A schema + // that fails here would surface as a confusing build failure for users. + assertValidOpenAPI(t, out) + }) +} + +// FuzzInputTypeJSONSchema constructs arbitrary InputType trees directly (not via +// the annotation resolver) and ensures both the per-field JSON schema helper +// and full OpenAPI generation never panic and always emit a valid document. +// Building InputType directly reaches shapes the resolver may not produce, +// stressing inputTypeJSONSchema and buildInputSchema in isolation. +func FuzzInputTypeJSONSchema(f *testing.F) { + f.Add([]byte{0, 3}) // primitive string + f.Add([]byte{1}) // any + f.Add([]byte{2, 0, 3}) // array of string + f.Add([]byte{3, 2, 0, 3, 0, 1}) // union of string and float + f.Add([]byte{3, 2, 0, 3, 0, 1, 0xff}) // nullable union of string and float + + f.Fuzz(func(t *testing.T, data []byte) { + it, _ := decodeInputType(data, 0, 0) + + // Field with both a compat FieldType and the recursive InputType set, + // mirroring how the parser populates InputField. + field := InputField{ + Name: "value", + Order: 0, + FieldType: FieldType{Primitive: TypeAny, Repetition: Required}, + InputType: &it, + } + + inputs := NewOrderedMap[string, InputField]() + inputs.Set("value", field) + out, err := GenerateOpenAPISchema(&PredictorInfo{ + Inputs: inputs, + Output: SchemaPrim(TypeString), + Mode: ModePredict, + }) + if err != nil { + return + } + assertValidOpenAPI(t, out) + }) +} + +// assertValidOpenAPI loads and validates a generated OpenAPI document with the +// same kin-openapi validator used by writeAndValidateSchema at build time. +// A document that fails validation is a generation bug. +func assertValidOpenAPI(t *testing.T, schemaJSON []byte) { + t.Helper() + loader := openapi3.NewLoader() + loader.IsExternalRefsAllowed = true + doc, err := loader.LoadFromData(schemaJSON) + if err != nil { + t.Fatalf("generated schema failed to load: %v\n%s", err, string(schemaJSON)) + } + if err := doc.Validate(context.Background()); err != nil { + t.Fatalf("generated schema is invalid: %v\n%s", err, string(schemaJSON)) + } +} + +// decodeInputType builds an InputType tree from bytes, mirroring the encoding +// strategy of decodeSchemaType. The final byte of a primitive/union toggles +// nullability so the fuzzer reaches both nullable and non-nullable shapes. +func decodeInputType(data []byte, offset int, depth int) (InputType, int) { + if depth > maxFuzzDepth || offset >= len(data) { + return InputPrimitive(TypeString), offset + } + + kind := InputTypeKind(data[offset] % 4) + offset++ + + switch kind { + case InputKindPrimitive: + prim := PrimitiveType(0) + if offset < len(data) { + prim = PrimitiveType(data[offset] % 9) + offset++ + } + it := InputPrimitive(prim) + if offset < len(data) { + if data[offset]%2 == 1 { + it.Nullable = true + } + offset++ + } + return it, offset + + case InputKindAny: + it := InputAnyType() + if offset < len(data) { + if data[offset]%2 == 1 { + it.Nullable = true + } + offset++ + } + return it, offset + + case InputKindArray: + elem, newOffset := decodeInputType(data, offset, depth+1) + it := InputArrayOf(elem) + if newOffset < len(data) { + if data[newOffset]%2 == 1 { + it.Nullable = true + } + newOffset++ + } + return it, newOffset + + case InputKindUnion: + numVariants := 0 + if offset < len(data) { + numVariants = int(data[offset]) % 4 // cap at 3 variants + offset++ + } + variants := make([]InputType, 0, numVariants) + for i := 0; i < numVariants && offset < len(data); i++ { + v, newOffset := decodeInputType(data, offset, depth+1) + variants = append(variants, v) + offset = newOffset + } + it := InputUnionOf(variants...) + if offset < len(data) { + if data[offset]%2 == 1 { + it.Nullable = true + } + offset++ + } + return it, offset + + default: + return InputPrimitive(TypeString), offset + } +} From 9e2a8e1485b61e8a49ead4e254d6a67e544ded00 Mon Sep 17 00:00:00 2001 From: Mark Phelps Date: Fri, 5 Jun 2026 09:33:21 -0400 Subject: [PATCH 2/3] docs: correct union input type support in python.md The union-inputs feature (#3048) added input union support, but the "Type limitations" section still claimed only Optional[T] was supported, contradicting the new Union section. Scope the limitation correctly: output unions remain unsupported, JSON-native input unions are supported, and input unions of Path/File/Secret/custom-coder/BaseModel members fail at build. Also add the missing Union table-of-contents entry and regenerate llms.txt. --- docs/llms.txt | 4 +++- docs/python.md | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/llms.txt b/docs/llms.txt index 92c90a36ab..2044b2f6d7 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -2070,6 +2070,7 @@ This document defines the API of the `cog` Python module, which is used to defin - [`cog.Secret`](#cogsecret) - [Wrapper types](#wrapper-types) - [`Optional`](#optional) + - [`Union`](#union) - [`list`](#list) - [`dict`](#dict) - [`cog.Opaque`](#cogopaque) @@ -2837,7 +2838,8 @@ Fields in a `BaseModel` output support these types: The following type patterns are **not** supported: - **Nested generics**: `list[list[str]]`, `list[Optional[str]]`, `Optional[list[str]]` are not supported. -- **Union types beyond Optional**: `str | int`, `Union[str, int, None]` — only `Optional[T]` (i.e. `T | None`) is supported. +- **Output union types beyond Optional**: union _return_ types and `BaseModel` union fields are not supported. Input unions of JSON-native types (`str | int`, `str | float | None`, etc.) _are_ supported — see [`Union`](#union). +- **Input unions of non-JSON-native types**: input unions involving `Path`, `File`, `Secret`, custom coders, or `BaseModel` (e.g. `Path | str`) are not supported and fail at build time. - **`Optional` as a top-level return type**: `-> Optional[str]` is not allowed. Use a `BaseModel` with optional fields instead. - **Nested `BaseModel` fields**: A `BaseModel` field typed as another `BaseModel` is not supported in Cog's type system for schema generation. - **Tuple, Set, or other collection types**: Only `list` and `dict` are supported as collection types. diff --git a/docs/python.md b/docs/python.md index 2603101ff9..fe9307aa3b 100644 --- a/docs/python.md +++ b/docs/python.md @@ -38,6 +38,7 @@ This document defines the API of the `cog` Python module, which is used to defin - [`cog.Secret`](#cogsecret) - [Wrapper types](#wrapper-types) - [`Optional`](#optional) + - [`Union`](#union) - [`list`](#list) - [`dict`](#dict) - [`cog.Opaque`](#cogopaque) @@ -805,7 +806,8 @@ Fields in a `BaseModel` output support these types: The following type patterns are **not** supported: - **Nested generics**: `list[list[str]]`, `list[Optional[str]]`, `Optional[list[str]]` are not supported. -- **Union types beyond Optional**: `str | int`, `Union[str, int, None]` — only `Optional[T]` (i.e. `T | None`) is supported. +- **Output union types beyond Optional**: union _return_ types and `BaseModel` union fields are not supported. Input unions of JSON-native types (`str | int`, `str | float | None`, etc.) _are_ supported — see [`Union`](#union). +- **Input unions of non-JSON-native types**: input unions involving `Path`, `File`, `Secret`, custom coders, or `BaseModel` (e.g. `Path | str`) are not supported and fail at build time. - **`Optional` as a top-level return type**: `-> Optional[str]` is not allowed. Use a `BaseModel` with optional fields instead. - **Nested `BaseModel` fields**: A `BaseModel` field typed as another `BaseModel` is not supported in Cog's type system for schema generation. - **Tuple, Set, or other collection types**: Only `list` and `dict` are supported as collection types. From 262f91a47aba7d2928e48f2bb1f85a9ff79728c0 Mon Sep 17 00:00:00 2001 From: Mark Phelps Date: Fri, 5 Jun 2026 09:35:42 -0400 Subject: [PATCH 3/3] test: use testify require in assertValidOpenAPI Replace raw t.Fatalf with require.NoError per the project testing conventions (AGENTS.md). --- pkg/schema/input_type_fuzz_test.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pkg/schema/input_type_fuzz_test.go b/pkg/schema/input_type_fuzz_test.go index cf59da0b88..d8ae1ab064 100644 --- a/pkg/schema/input_type_fuzz_test.go +++ b/pkg/schema/input_type_fuzz_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/getkin/kin-openapi/openapi3" + "github.com/stretchr/testify/require" ) // FuzzResolveInputType builds arbitrary TypeAnnotation trees from fuzz input @@ -163,12 +164,9 @@ func assertValidOpenAPI(t *testing.T, schemaJSON []byte) { loader := openapi3.NewLoader() loader.IsExternalRefsAllowed = true doc, err := loader.LoadFromData(schemaJSON) - if err != nil { - t.Fatalf("generated schema failed to load: %v\n%s", err, string(schemaJSON)) - } - if err := doc.Validate(context.Background()); err != nil { - t.Fatalf("generated schema is invalid: %v\n%s", err, string(schemaJSON)) - } + require.NoError(t, err, "generated schema failed to load\n%s", string(schemaJSON)) + err = doc.Validate(context.Background()) + require.NoError(t, err, "generated schema is invalid\n%s", string(schemaJSON)) } // decodeInputType builds an InputType tree from bytes, mirroring the encoding