From 1723ba47800db3faff424a21c994416aaf996d85 Mon Sep 17 00:00:00 2001 From: yangjie 00013916 Date: Mon, 8 Jun 2026 15:32:32 +0800 Subject: [PATCH] fix(extraction): strip generic type arguments from supertype names so `extends Base` resolves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Class inheritance edges were silently dropped whenever a superclass or implemented interface carried generic type arguments, e.g. `class A extends Base` or `class B implements Iface`. Root cause: in the shared tree-sitter supertype extraction the extends/implements/C++-base paths used getNodeText() on the supertype node. For a generic supertype the node is `generic_type` (C++ `template_type`), so the captured reference name included the type arguments ("Base"). During resolution this never matched the class node named "Base", and the reference was discarded — it was not even kept in unresolved_refs, so the loss was silent. This affects every language that goes through the shared supertype extraction path: Java, TypeScript, and C++ templates. (C#, Kotlin, Swift, Rust and Go already narrow to the bare type identifier or handle `generic_type` explicitly, so they were unaffected.) The dropped edges land disproportionately on `*ServiceImpl extends BaseXxx` interface+DI classes that matter most for call-graph reachability. ## Fix Add a shared `stripTypeArguments` helper and use it when recording the extends/implements reference name, dropping the angle-bracket suffix (including nested generics) while preserving any qualified prefix so resolution can still disambiguate same-named types across packages. ## Test Added regression tests asserting the extends/implements reference names are stripped for generic and nested-generic Java supertypes and C++ template base classes. Verified end-to-end that a generic Java project now produces the extends/implements edges that were previously dropped. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 4 +++ __tests__/extraction.test.ts | 41 +++++++++++++++++++++++++++ src/extraction/tree-sitter-helpers.ts | 21 ++++++++++++++ src/extraction/tree-sitter.ts | 14 ++++++--- 4 files changed, 76 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0fcf6519..0d79c7486 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Fixes + +- Class inheritance edges are no longer silently dropped when the superclass or implemented interface carries generic type arguments — for example `class A extends Base` or `class B implements Iface` (including nested generics and qualified names). This affected Java, TypeScript, and C++ templates, and most often broke exactly the interface-to-implementation service classes that matter most for navigation, so `codegraph_callers`, `codegraph_impact`, and `codegraph_trace` now follow those inheritance links instead of dead-ending. + ## [0.9.8] - 2026-06-01 diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index b497af6a9..4c4ae3209 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -940,6 +940,47 @@ public class Splitter { }); }); +describe('Generic supertype extraction', () => { + // Regression: a generic superclass/interface parses as a `generic_type` + // (C++ `template_type`) node whose text carries the angle-bracket suffix. + // Without stripping the type arguments the reference name (`Base`) never + // matched the `Base` class node during resolution, so the extends/implements + // edge was silently dropped — it wasn't even kept in unresolved_refs. + it('strips generic type arguments from Java extends/implements supertypes', () => { + const code = ` +class Base {} +interface Iface {} +class A extends Base {} +class B implements Iface {} +class C extends Base> {} +`; + const result = extractFromSource('Generics.java', code); + + const extendsRefs = result.unresolvedReferences.filter((r) => r.referenceKind === 'extends'); + const implementsRefs = result.unresolvedReferences.filter((r) => r.referenceKind === 'implements'); + + // `extends Base` and the nested `extends Base>` both → "Base" + expect(extendsRefs.some((r) => r.referenceName === 'Base')).toBe(true); + // `implements Iface` → "Iface" + expect(implementsRefs.some((r) => r.referenceName === 'Iface')).toBe(true); + // The angle-bracket suffix is fully removed, including for nested generics. + expect(extendsRefs.every((r) => !r.referenceName.includes('<'))).toBe(true); + expect(implementsRefs.every((r) => !r.referenceName.includes('<'))).toBe(true); + }); + + it('strips template arguments from C++ base classes', () => { + const code = ` +template class Base {}; +class Derived : public Base {}; +`; + const result = extractFromSource('derived.cpp', code); + + const extendsRefs = result.unresolvedReferences.filter((r) => r.referenceKind === 'extends'); + expect(extendsRefs.some((r) => r.referenceName === 'Base')).toBe(true); + expect(extendsRefs.every((r) => !r.referenceName.includes('<'))).toBe(true); + }); +}); + describe('C# Extraction', () => { it('should extract class declarations', () => { const code = ` diff --git a/src/extraction/tree-sitter-helpers.ts b/src/extraction/tree-sitter-helpers.ts index 0f2dd1bfd..05a4a1a56 100644 --- a/src/extraction/tree-sitter-helpers.ts +++ b/src/extraction/tree-sitter-helpers.ts @@ -43,6 +43,27 @@ export function getChildByField(node: SyntaxNode, fieldName: string): SyntaxNode return node.childForFieldName(fieldName); } +/** + * Strip generic/template type arguments from a type reference name. + * + * A supertype written as `Base` (or nested `Base>`) is captured by + * tree-sitter as a `generic_type`/`template_type` node whose text includes the + * angle-bracket suffix. Class nodes are indexed under their argument-free name + * (`Base`), so without stripping, an `extends Base` reference resolves to + * nothing and the inheritance edge is silently dropped. + * + * Only the first `<` matters: no Java/C#/Kotlin/Scala/C++ type identifier + * contains `<` except as the generic-argument delimiter, so slicing there is + * safe even for nested generics. Qualified prefixes (`com.foo.Base`, `ns::Base`) + * are intentionally preserved — resolution uses them to disambiguate same-named + * types across packages/namespaces. The `> 0` guard leaves synthetic names that + * legitimately start with `<` (e.g. anonymous-class markers) untouched. + */ +export function stripTypeArguments(name: string): string { + const lt = name.indexOf('<'); + return (lt > 0 ? name.slice(0, lt) : name).trim(); +} + /** * Get the docstring/comment preceding a node */ diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index f576839fa..83129195b 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -16,7 +16,7 @@ import { UnresolvedReference, } from '../types'; import { getParser, detectLanguage, isLanguageSupported, isFileLevelOnlyLanguage } from './grammars'; -import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring } from './tree-sitter-helpers'; +import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring, stripTypeArguments } from './tree-sitter-helpers'; import type { LanguageExtractor, ExtractorContext } from './tree-sitter-types'; import { EXTRACTORS } from './languages'; import { LiquidExtractor } from './liquid-extractor'; @@ -2127,7 +2127,9 @@ export class TreeSitterExtractor { const targets = typeList ? typeList.namedChildren : [child.namedChild(0)]; for (const target of targets) { if (target) { - const name = getNodeText(target, this.source); + // Generic supertypes parse as `generic_type` whose text is `Base`; + // strip the type arguments so the ref resolves to the `Base` node. + const name = stripTypeArguments(getNodeText(target, this.source)); this.unresolvedReferences.push({ fromNodeId: classId, referenceName: name, @@ -2149,9 +2151,11 @@ export class TreeSitterExtractor { t.type === 'qualified_identifier' || t.type === 'template_type' ) { + // `template_type` text is `Base` — strip the template arguments + // so the base resolves to the `Base` node. this.unresolvedReferences.push({ fromNodeId: classId, - referenceName: getNodeText(t, this.source), + referenceName: stripTypeArguments(getNodeText(t, this.source)), referenceKind: 'extends', line: t.startPosition.row + 1, column: t.startPosition.column, @@ -2172,7 +2176,9 @@ export class TreeSitterExtractor { const targets = typeList ? typeList.namedChildren : child.namedChildren; for (const iface of targets) { if (iface) { - const name = getNodeText(iface, this.source); + // Generic interfaces parse as `generic_type` whose text is `Iface`; + // strip the type arguments so the ref resolves to the `Iface` node. + const name = stripTypeArguments(getNodeText(iface, this.source)); this.unresolvedReferences.push({ fromNodeId: classId, referenceName: name,