diff --git a/.env.example b/.env.example index f2b4de8b..a433d1be 100644 --- a/.env.example +++ b/.env.example @@ -6,7 +6,8 @@ GOOGLE_API_KEY='your-google-api-key' WORKSPACE_PATH='./workspace' # default local workspace path for tools that write to disk, can be overridden by tools with custom paths or when using AgentFS -BETTER_AUTH_URL='https://localhost:3000' # Base App URL +BETTER_AUTH_URL='http://localhost:3000' # Base App URL +NEXT_PUBLIC_BETTER_AUTH_URL='http://localhost:3000' # Browser-side Better Auth base URL BETTER_AUTH_SECRET='KQh7DvS4PtsNqJ1PZSoYheGOo1k13SUZqUBwNazc28U=' # openssl rand -base64 32 <--- run this to make ur own DEV_AUTH_ENABLED=true # Set to false to disable authentication in development (not recommended, but can be useful for quick testing) # Next.js + Mastra Client SDK @@ -29,20 +30,17 @@ DISCORD_CLIENT_ID='your_discord_client_id_here' DISCORD_SECRET_KEY='your_discord_secret_key_here' DISCORD_WEBHOOK_URL='your_discord_webhook_url_here' GOOGLE_CLIENT_ID="******************-**********************.apps.googleusercontent.com" +NEXT_PUBLIC_GOOGLE_CLIENT_ID="******************-**********************.apps.googleusercontent.com" GOOGLE_CLIENT_SECRET="fake_google_client_secret_for_local_dev" -GOOGLE_CLIENT_CALLBACK_URL="https://localhost:3000/api/callback" +GOOGLE_CLIENT_CALLBACK_URL="http://localhost:3000/api/auth/callback/google" #Authorized redirect URIs -#https://localhost:3000/api/callback +#http://localhost:3000/api/auth/callback/google -#https://localhost:3000/callback +#http://127.0.0.1:3000/api/auth/callback/google -#https://127.0.0.1:3000/api/callback - -#http://127.0.0.1:3000/api/callback - -#https://127.0.0.1:3000/callback +#https://your-domain.com/api/auth/callback/google # Opencode Zen API Key diff --git a/.gitignore b/.gitignore index 971ab63a..f143f203 100644 --- a/.gitignore +++ b/.gitignore @@ -145,7 +145,7 @@ gha-creds-*.json .zencoder/rules/*.md # Stakpak local files -.stakpak +.stakpak/session* stakpak.backup stakpak.exe opnapi.json @@ -208,3 +208,19 @@ src/mastra/public/workspace/workspace/iran-war-report.md .env.local.bak thoughts/ledgers/CONTINUITY_ses_303c.md thoughts/ledgers/CONTINUITY_ses_303d.md +.mastra-project.json +start-server.js +start-dev.js +.playwright-mcp/page-2026-04-15T07-03-10-932Z.yml +.playwright-mcp/page-2026-04-15T07-04-06-556Z.yml +.playwright-mcp/page-2026-04-15T08-44-34-033Z.yml +.playwright-mcp/page-2026-04-15T07-36-00-846Z.yml +.playwright-mcp/page-2026-04-15T07-34-28-100Z.yml +.gitignore +.playwright-mcp/page-2026-04-15T07-29-52-207Z.yml +.playwright-mcp/page-2026-04-15T07-17-59-962Z.yml +.playwright-mcp/page-2026-04-15T07-31-15-475Z.yml +.playwright-mcp/page-2026-04-15T07-26-05-653Z.yml +.playwright-mcp/page-2026-04-15T07-24-17-682Z.yml +.playwright-mcp/page-2026-04-15T07-20-32-715Z.yml +.playwright-mcp/page-2026-04-15T07-18-30-461Z.yml diff --git a/README.md b/README.md index ac06e27d..da5ad3bd 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ ## Development ![Networks Custom Tool v1.0.0](networksCustomToolv1.png) +![Home Page v1.0.0](page-2026-04-15T07-04-23-082Z.png) diff --git a/app/chat/AGENTS.md b/app/chat/AGENTS.md index d8c2dffa..f418304b 100644 --- a/app/chat/AGENTS.md +++ b/app/chat/AGENTS.md @@ -2,6 +2,20 @@ # App/Chat +## Recent Update (2026-04-15) + +- Settings pages are now modular route groups instead of two monolithic screens: + - `/chat/user` overview + focused routes for profile, security, sessions, API keys, and danger zone + - `/chat/admin` overview + focused routes for runtime and user operations +- Use `app/chat/components/chat-settings-shell.tsx` when a chat route needs the shared `ChatProvider` + `ChatPageShell` + `MainSidebar` composition plus an in-section settings nav. +- Use the same shared shell composition for non-settings dashboard surfaces that need the persistent chat sidebar; the current wrapped set includes datasets, evaluation, observability, tools, logs, harness, MCP/A2A, workflows, and workflow detail pages. +- `UserSettingsPanel` and `AdminSettingsPanel` now support section-based rendering so new routes can reuse the same Better Auth mutations without duplicating form logic. +- `useWorkspaces()` in `lib/hooks/use-mastra-query.ts` now returns normalized `WorkspaceItem[]`; new chat UI code should consume that array directly instead of re-decoding `{ workspaces: [...] }` response shapes in components. +- Prefer shared tooltip and scroll affordances on high-density chat surfaces: + - add tooltip descriptions for navigation items and overview cards when a route’s purpose is not obvious + - use `ScrollArea` for long sidebars, thread lists, or horizontally dense settings navs rather than letting layout overflow + - keep shell spacing consistent through `ChatPageShell` instead of per-page padding drift + ## Overview The `/chat` route provides a rich AI chat interface built with **AI Elements** (52 components) integrated with **26+ Mastra agents**. Uses `@ai-sdk/react` with `useChat` and `DefaultChatTransport` to stream responses from Mastra's `/chat` route. diff --git a/app/chat/admin/_components/admin-management-panel.tsx b/app/chat/admin/_components/admin-management-panel.tsx index e12a81aa..cee0cbf9 100644 --- a/app/chat/admin/_components/admin-management-panel.tsx +++ b/app/chat/admin/_components/admin-management-panel.tsx @@ -81,6 +81,7 @@ type PasswordFormState = { } type AdminRole = 'user' | 'admin' +export type AdminSettingsPanelSection = 'all' | 'runtime' | 'users' const pageSize = 20 @@ -183,7 +184,11 @@ function mapUserToEditForm(user: AdminUser | null | undefined): EditUserFormStat /** * Admin user-management panel powered entirely by Better Auth hooks. */ -export function AdminSettingsPanel() { +export function AdminSettingsPanel({ + section = 'all', +}: { + section?: AdminSettingsPanelSection +}) { const { data: authSession } = useAuthQuery() const { data: modelProvidersData } = useAgentModelProviders() const [search, setSearch] = React.useState('') @@ -366,6 +371,8 @@ export function AdminSettingsPanel() { const refreshDisabled = usersQuery.isFetching const detailBusy = selectedUserQuery.isLoading || selectedSessionsQuery.isLoading + const showRuntime = section === 'all' || section === 'runtime' + const showUsers = section === 'all' || section === 'users' return ( @@ -549,7 +556,8 @@ export function AdminSettingsPanel() { - + {showUsers ? ( + Find users @@ -626,9 +634,11 @@ export function AdminSettingsPanel() { - + + ) : null} - + {showRuntime ? ( + Runtime context @@ -672,9 +682,11 @@ export function AdminSettingsPanel() { ))} - + + ) : null} -
+ {showUsers ? ( +
Users @@ -1041,7 +1053,8 @@ export function AdminSettingsPanel() { -
+
+ ) : null}
) diff --git a/app/chat/admin/layout.tsx b/app/chat/admin/layout.tsx new file mode 100644 index 00000000..6c3ffe8e --- /dev/null +++ b/app/chat/admin/layout.tsx @@ -0,0 +1,39 @@ +'use client' + +import type { ReactNode } from 'react' + +import { ChatSettingsShell } from '../components/chat-settings-shell' + +const adminSettingsSections = [ + { + href: '/chat/admin', + title: 'Overview', + description: 'Start from the admin summary and branch into runtime or user operations.', + }, + { + href: '/chat/admin/runtime', + title: 'Runtime', + description: 'Inspect active auth/runtime context and connected model providers.', + }, + { + href: '/chat/admin/users', + title: 'Users', + description: 'Search users, change roles, moderate access, impersonate, and revoke sessions.', + }, +] as const + +export default function AdminSettingsLayout({ + children, +}: { + children: ReactNode +}) { + return ( + + {children} + + ) +} diff --git a/app/chat/admin/page.tsx b/app/chat/admin/page.tsx index 71e99c0e..4ef60539 100644 --- a/app/chat/admin/page.tsx +++ b/app/chat/admin/page.tsx @@ -1,18 +1,54 @@ -import { ChatPageShell } from '../components/chat-page-shell' -import { MainSidebar } from '../components/main-sidebar' -import { AdminSettingsPanel } from './_components/admin-management-panel' +import Link from 'next/link' + +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/ui/card' +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from '@/ui/tooltip' /** - * Admin settings page for chat operators. + * Admin settings overview for chat operators. */ export default function AdminPage() { + const sections = [ + { + href: '/chat/admin/runtime', + title: 'Runtime context', + description: 'Inspect the live Better Auth session and Mastra provider inventory.', + }, + { + href: '/chat/admin/users', + title: 'Users', + description: 'Search users, edit records, manage roles, moderate bans, and revoke sessions.', + }, + ] as const + return ( - } - > - - + +
+ {sections.map((section) => ( + + + + + + {section.title} + {section.description} + + + Open the focused {section.title.toLowerCase()} admin route. + + + + + + {section.description} + + + ))} +
+
) } diff --git a/app/chat/admin/runtime/page.tsx b/app/chat/admin/runtime/page.tsx new file mode 100644 index 00000000..d93d74da --- /dev/null +++ b/app/chat/admin/runtime/page.tsx @@ -0,0 +1,5 @@ +import { AdminSettingsPanel } from '../_components/admin-management-panel' + +export default function AdminRuntimeSettingsPage() { + return +} diff --git a/app/chat/admin/users/page.tsx b/app/chat/admin/users/page.tsx new file mode 100644 index 00000000..838b6992 --- /dev/null +++ b/app/chat/admin/users/page.tsx @@ -0,0 +1,5 @@ +import { AdminSettingsPanel } from '../_components/admin-management-panel' + +export default function AdminUsersSettingsPage() { + return +} diff --git a/app/chat/components/chat-layout.tsx b/app/chat/components/chat-layout.tsx index 86f2c23d..70e6378e 100644 --- a/app/chat/components/chat-layout.tsx +++ b/app/chat/components/chat-layout.tsx @@ -6,6 +6,7 @@ import { ChatInput } from './chat-input' import { ChatSidebar } from './chat-sidebar' import { MainSidebar } from './main-sidebar' import { SidebarProvider, SidebarInset } from '@/ui/sidebar' +import { ResizableHandle, ResizablePanel, ResizablePanelGroup } from '@/ui/resizable' import { useChatContext } from '../providers/chat-context-hooks' import { cn } from '@/lib/utils' @@ -23,18 +24,30 @@ export function ChatLayout() { -
-
- - -
- - {!isFocusMode && ( -
- + + +
+ +
- )} -
+ + + {!isFocusMode ? ( + <> + + +
+ +
+
+ + ) : null} + diff --git a/app/chat/components/chat-messages.tsx b/app/chat/components/chat-messages.tsx index 3a1128ce..36f0d0cb 100644 --- a/app/chat/components/chat-messages.tsx +++ b/app/chat/components/chat-messages.tsx @@ -72,6 +72,7 @@ import { AgentConfirmation } from './agent-confirmation' import { AgentWorkflow, type WorkflowNode, type WorkflowEdge } from './agent-workflow' import { extractPlanFromText, + extractThoughtSummaryFromProviderMetadata, parseReasoningToSteps, tokenizeInlineCitations, } from './chat.utils' @@ -743,28 +744,17 @@ function extractThoughtSummaryFromParts( } for (const part of parts) { - const pm = (part as { providerMetadata?: ProviderMetadata | undefined }) - .providerMetadata - - const googleMeta = (pm as Record).google - if ( - googleMeta === undefined || - googleMeta === null || - typeof googleMeta !== 'object' - ) { - continue - } - - const candidates = [ - (googleMeta as Record).thoughtSummary, - (googleMeta as Record).thoughts, - (googleMeta as Record).thinkingSummary, - ] - - for (const c of candidates) { - if (typeof c === 'string' && c.trim().length > 0) { - return c - } + const providerMetadata = + 'providerMetadata' in part + ? (part.providerMetadata as ProviderMetadata | undefined) + : 'callProviderMetadata' in part + ? (part.callProviderMetadata as ProviderMetadata | undefined) + : undefined + + const summary = + extractThoughtSummaryFromProviderMetadata(providerMetadata) + if (summary.length > 0) { + return summary } } @@ -2158,7 +2148,15 @@ export function ChatMessages(_props?: Partial) { const timeoutId = window.setTimeout(() => { const validateMessages = async () => { - if (messages.length === 0) { + const hasIncompleteMessage = messages.some((message) => { + if (!Array.isArray(message.parts)) { + return true + } + + return message.parts.length === 0 + }) + + if (messages.length === 0 || isLoading || hasIncompleteMessage) { setValidationError(null) return } @@ -2186,7 +2184,7 @@ export function ChatMessages(_props?: Partial) { isMounted = false window.clearTimeout(timeoutId) } - }, [messages]) + }, [isLoading, messages]) const showReasoning = agentConfig?.features.reasoning ?? false const showChainOfThought = agentConfig?.features.chainOfThought ?? false diff --git a/app/chat/components/chat-page-shell.tsx b/app/chat/components/chat-page-shell.tsx index 3a58b974..c1a3c3fb 100644 --- a/app/chat/components/chat-page-shell.tsx +++ b/app/chat/components/chat-page-shell.tsx @@ -13,6 +13,7 @@ interface ChatPageShellProps { eyebrow?: string contentClassName?: string sidebar?: ReactNode + hideHeader?: boolean } /** @@ -26,37 +27,40 @@ export function ChatPageShell({ eyebrow = 'AgentStack command center', contentClassName, sidebar, + hideHeader = false, }: ChatPageShellProps) { return ( {sidebar ?? null}
-
-
-
-

- {eyebrow} -

-

- {title} -

-

- {description} -

-
- - {actions ? ( -
- {actions} + {hideHeader ? null : ( +
+
+
+

+ {eyebrow} +

+

+ {title} +

+

+ {description} +

- ) : null} -
-
+ + {actions ? ( +
+ {actions} +
+ ) : null} +
+
+ )}
diff --git a/app/chat/components/chat-settings-shell.tsx b/app/chat/components/chat-settings-shell.tsx new file mode 100644 index 00000000..8fce08b6 --- /dev/null +++ b/app/chat/components/chat-settings-shell.tsx @@ -0,0 +1,96 @@ +'use client' + +import Link from 'next/link' +import { usePathname } from 'next/navigation' +import type { ReactNode } from 'react' + +import { MainSidebar } from './main-sidebar' +import { ChatPageShell } from './chat-page-shell' +import { ChatProvider } from '../providers/chat-context' +import { cn } from '@/lib/utils' +import { ScrollArea, ScrollBar } from '@/ui/scroll-area' +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from '@/ui/tooltip' + +type SettingsSection = { + href: string + title: string + description: string +} + +interface ChatSettingsShellProps { + title: string + description: string + sections: SettingsSection[] + children: ReactNode +} + +/** + * Shared shell for modular chat settings routes that need the main sidebar and + * a secondary in-section navigation. + */ +export function ChatSettingsShell({ + title, + description, + sections, + children, +}: ChatSettingsShellProps) { + const pathname = usePathname() + + return ( + + } + > + +
+ + + + + + {children} +
+
+
+
+ ) +} diff --git a/app/chat/components/chat-sidebar.tsx b/app/chat/components/chat-sidebar.tsx index 1a74cb19..5f629186 100644 --- a/app/chat/components/chat-sidebar.tsx +++ b/app/chat/components/chat-sidebar.tsx @@ -2,6 +2,7 @@ import { useChatContext } from '@/app/chat/providers/chat-context-hooks' import { + DEFAULT_VECTOR_STORE_NAME, useAgent, useAgentEnhanceInstructionsMutation, useAgents, @@ -10,6 +11,7 @@ import { useTools, useTraces, useVectorIndexes, + useVectors, useWorkflows, useProcessors, useScorers, @@ -66,6 +68,11 @@ type TabKey = | 'config' type TraceRecord = Record +type WorkspaceRecord = { + id?: string + name?: string + agentName?: string +} const TRACE_STATUS_COLORS: Record = { ok: 'bg-emerald-500/15 text-emerald-400 border-emerald-500/20', @@ -233,7 +240,7 @@ export function ChatSidebar() { const workflowsQuery = useWorkflows() const tracesQuery = useTraces({ pagination: { page: 1, perPage: 20 } }) const threadsQuery = useThreads({ resourceId }) - const vectorsQuery = useVectorIndexes() + const vectorStoresQuery = useVectors() const memoryStatusQuery = useMemoryStatus(agentConfig?.id ?? '') const agentDetailsQuery = useAgent(selectedAgent) @@ -253,10 +260,6 @@ export function ChatSidebar() { void tracesQuery.refetch() }, [tracesQuery]) - const onRefreshVectors = useCallback(() => { - void vectorsQuery.refetch() - }, [vectorsQuery]) - const onRefreshThreads = useCallback(() => { void threadsQuery.refetch() @@ -281,19 +284,28 @@ export function ChatSidebar() { const threads = threadsQuery.data ?? [] const loadingThreads = threadsQuery.isLoading - const vectors = vectorsQuery.data ?? [] - const loadingVectors = vectorsQuery.isLoading - const workspacesQuery = useWorkspaces() const workspaceSkillsQuery = useWorkspaceSkills(resourceId) const storedSkillsQuery = useStoredSkills() const processorsQuery = useProcessors() const scorersQuery = useScorers() - const workspaceLabels = useMemo( - () => normalizeCollection(workspacesQuery.data), + const workspaceItems = useMemo( + () => (workspacesQuery.data ?? []) as WorkspaceRecord[], [workspacesQuery.data] ) + const workspaceLabels = useMemo( + () => + workspaceItems + .map((workspace) => + safeString( + workspace.name ?? workspace.agentName ?? workspace.id, + '' + ) + ) + .filter((label) => label.length > 0), + [workspaceItems] + ) const workspaceSkillLabels = useMemo( () => normalizeCollection(workspaceSkillsQuery.data), [workspaceSkillsQuery.data] @@ -316,8 +328,47 @@ export function ChatSidebar() { [workflows] ) + const activeWorkspace = useMemo( + () => workspaceItems.find((workspace) => workspace.id === resourceId) ?? null, + [resourceId, workspaceItems] + ) + const preferredVectorStoreName = useMemo(() => { + const vectorStores = Array.isArray(vectorStoresQuery.data) + ? vectorStoresQuery.data + : [] + const vectorNames: string[] = vectorStores + .map((vectorStore) => { + if (typeof vectorStore === 'string') { + return vectorStore + } + + if (isRecord(vectorStore)) { + return safeString(vectorStore.name ?? vectorStore.id, '') + } + + return '' + }) + .filter((name: string) => name.length > 0) + + return ( + vectorNames.find((name) => name === DEFAULT_VECTOR_STORE_NAME) ?? + vectorNames[0] ?? + DEFAULT_VECTOR_STORE_NAME + ) + }, [vectorStoresQuery.data]) + const vectorsQuery = useVectorIndexes(preferredVectorStoreName) + const vectors = vectorsQuery.data ?? [] + const loadingVectors = vectorsQuery.isLoading || vectorStoresQuery.isLoading + + const onRefreshVectors = useCallback(() => { + void Promise.all([vectorStoresQuery.refetch(), vectorsQuery.refetch()]) + }, [vectorStoresQuery, vectorsQuery]) + const workspaceName = - workspaceLabels[0] ?? safeString(resourceId, 'Current workspace') + safeString( + activeWorkspace?.name ?? activeWorkspace?.agentName ?? activeWorkspace?.id, + safeString(resourceId, 'Current workspace') + ) const memoryStatusRes = memoryStatusQuery.data @@ -434,7 +485,7 @@ export function ChatSidebar() { { setActiveTab(v as TabKey); }} - className="flex flex-col flex-1 overflow-hidden" + className="flex flex-col flex-1 min-h-0 overflow-hidden" >
@@ -497,8 +548,8 @@ export function ChatSidebar() {
- -
+ +
{/* ──── Threads Tab ──── */}
-
- -

- Vector Indexes -

+
+ +

+ Vector Indexes +

- + + + + + + + + + Return to the main chat dashboard. + + + - - - - Pages - - - - {pageItems.map((item) => { - const Icon = item.icon + + +
+ + + Pages + + + + {pageItems.map((item) => { + const Icon = item.icon - return ( - - - -
- -
-
-
- {item.label} -
-
- -
-
- ) - })} -
-
-
+ return ( + + + + + +
+ +
+
+
+ {item.label} +
+
+ {item.description} +
+
+ +
+
+ + {item.description} + +
+
+ ) + })} + + + - - - Current Threads - - - {threadsResult.isLoading ? ( -
- -
- ) : threads.length === 0 ? ( -
- No threads for this agent yet -
- ) : ( - - {threads.map((thread) => { - const currentThreadId = getThreadId(thread) - if (!currentThreadId) { - return null - } + + + Current Threads + + + {threadsResult.isLoading ? ( +
+ +
+ ) : threads.length === 0 ? ( +
+ No threads for this agent yet. +
+ ) : ( + + + {threads.map((thread) => { + const currentThreadId = getThreadId(thread) + if (!currentThreadId) { + return null + } - const label = getThreadLabel(thread) - const meta = formatThreadMeta(thread) + const label = getThreadLabel(thread) + const meta = formatThreadMeta(thread) - return ( - - { - handleThreadClick(currentThreadId) - }} - className={cn( - 'w-full cursor-pointer items-start gap-3 rounded-xl px-3 py-2.5 text-left transition-all duration-200', - threadId === currentThreadId - ? 'bg-primary/10 text-primary font-medium' - : 'text-muted-foreground hover:bg-muted/50 hover:text-foreground' - )} - > -
- -
-
-
- {label} -
-
- {meta || currentThreadId} -
-
-
-
- ) - })} -
- )} -
-
- + return ( + + + + { + handleThreadClick( + currentThreadId + ) + }} + className={cn( + 'w-full cursor-pointer items-start gap-3 rounded-2xl px-3 py-3 text-left transition-all duration-200', + threadId === currentThreadId + ? 'bg-primary/10 text-primary font-medium' + : 'text-muted-foreground hover:bg-muted/50 hover:text-foreground' + )} + > +
+ +
+
+
+ {label} +
+
+ {meta || + currentThreadId} +
+
+
+
+ +
+ {label} +
+
+ {meta || + currentThreadId} +
+
+
+
+ ) + })} +
+ + )} +
+
+
+
+
- -
- {session?.user ? ( -
-
- Account -
-
- {session.user.name ?? 'Account'} -
-
- {session.user.email} -
-
- - - - User settings - - - {session.user.role === 'admin' ? ( + +
+ {session?.user ? ( +
+
+ Account +
+
+ {session.user.name ?? 'Account'} +
+
+ {session.user.email} +
+
+ - - Admin settings - + + + + + User settings + + + + + Open your personal settings routes. + + - ) : null} - + {session.user.role === 'admin' ? ( + + + + + + Admin settings + + + + + Open runtime and user administration controls. + + + + ) : null} + +
-
- ) : null} + ) : null} - - - { - router.push('/chat/agents') - }} - > - - Choose agent - - - - - - -
- - + + + + + { + router.push('/chat/agents') + }} + > + + Choose agent + + + + Open the agent directory and start a new thread. + + + + + + +
+ +
+
+ + Sign out of the current Better Auth session. + +
+
+
+
+ + + ) } diff --git a/app/chat/components/nested-agent-chat.tsx b/app/chat/components/nested-agent-chat.tsx index ec693641..baa0aa00 100644 --- a/app/chat/components/nested-agent-chat.tsx +++ b/app/chat/components/nested-agent-chat.tsx @@ -8,6 +8,7 @@ export function NestedAgentChat() { const { messages, sendMessage, status } = useChat({ transport: new DefaultChatTransport({ api: 'http://localhost:4111/chat/weatherAgent', + credentials: 'include', }), }) diff --git a/app/chat/config/agents.ts b/app/chat/config/agents.ts index d5c292ef..e835d97a 100644 --- a/app/chat/config/agents.ts +++ b/app/chat/config/agents.ts @@ -140,7 +140,7 @@ export const AGENT_CONFIGS: Record = { "knowledgeIndexingAgent": { id: 'knowledgeIndexingAgent', name: 'Knowledge Indexing Agent', - description: 'Index documents into PgVector for semantic search', + description: 'Index documents into the live vector store for semantic search', category: 'research', features: { ...defaultFeatures, diff --git a/app/chat/dataset/page.tsx b/app/chat/dataset/page.tsx index 87c45e22..b3b0c216 100644 --- a/app/chat/dataset/page.tsx +++ b/app/chat/dataset/page.tsx @@ -30,6 +30,9 @@ import { TooltipProvider, TooltipTrigger, } from '@/ui/tooltip' +import { ChatPageShell } from '../components/chat-page-shell' +import { MainSidebar } from '../components/main-sidebar' +import { ChatProvider } from '../providers/chat-context' import { Dialog, DialogContent, @@ -343,6 +346,14 @@ export default function DatasetPage() { ) return ( + + } + hideHeader + contentClassName="p-0" + >
{showHelpPanel ? ( @@ -1489,5 +1500,7 @@ export default function DatasetPage() {
+ + ) } diff --git a/app/chat/evaluation/page.tsx b/app/chat/evaluation/page.tsx index fb64d152..4184a05d 100644 --- a/app/chat/evaluation/page.tsx +++ b/app/chat/evaluation/page.tsx @@ -59,6 +59,9 @@ import { SparklesIcon, } from 'lucide-react' import { Panel } from '@/src/components/ai-elements/panel' +import { ChatPageShell } from '../components/chat-page-shell' +import { MainSidebar } from '../components/main-sidebar' +import { ChatProvider } from '../providers/chat-context' type AnyRecord = Record @@ -236,6 +239,14 @@ export default function EvaluationPage() { } return ( + + } + hideHeader + contentClassName="p-0" + >
{showHelpPanel ? ( @@ -810,5 +821,7 @@ export default function EvaluationPage() {
+ + ) -} \ No newline at end of file +} diff --git a/app/chat/harness/page.tsx b/app/chat/harness/page.tsx index 4d02e283..82d1f35e 100644 --- a/app/chat/harness/page.tsx +++ b/app/chat/harness/page.tsx @@ -126,6 +126,9 @@ import { StackTraceHeader, StackTraceFrames, } from '@/src/components/ai-elements/stack-trace' +import { ChatPageShell } from '../components/chat-page-shell' +import { MainSidebar } from '../components/main-sidebar' +import { ChatProvider } from '../providers/chat-context' import { Terminal, TerminalActions, @@ -1098,6 +1101,14 @@ export default function HarnessPage() { } return ( + + } + hideHeader + contentClassName="p-0" + >
@@ -1730,5 +1741,7 @@ export default function HarnessPage() {
+
+
) } diff --git a/app/chat/layout.tsx b/app/chat/layout.tsx index 9e40b348..e5f1fbf0 100644 --- a/app/chat/layout.tsx +++ b/app/chat/layout.tsx @@ -1,4 +1,4 @@ -import type { ReactNode } from 'react' +import { Suspense, type ReactNode } from 'react' import { headers } from 'next/headers' import { redirect } from 'next/navigation' @@ -11,7 +11,7 @@ import { auth } from '@/auth' * This prevents unauthenticated users from briefly rendering protected chat * surfaces before the client redirect runs. */ -export default async function ChatLayout({ +async function ChatSessionGate({ children, }: { children: ReactNode @@ -26,3 +26,11 @@ export default async function ChatLayout({ return children } + +export default function ChatLayout({ + children, +}: { + children: ReactNode +}) { + return {children} +} diff --git a/app/chat/logs/page.tsx b/app/chat/logs/page.tsx index ea7ff150..0a2a1888 100644 --- a/app/chat/logs/page.tsx +++ b/app/chat/logs/page.tsx @@ -62,6 +62,9 @@ import { PanelRightCloseIcon, } from 'lucide-react' import { cn } from '@/lib/utils' +import { ChatPageShell } from '../components/chat-page-shell' +import { MainSidebar } from '../components/main-sidebar' +import { ChatProvider } from '../providers/chat-context' type LogRecord = Record @@ -243,6 +246,14 @@ export default function LogsPage() { }, [rawLogs]) return ( + + } + hideHeader + contentClassName="p-0" + >
{/* Header */} @@ -630,5 +641,7 @@ export default function LogsPage() {
+ + ) } diff --git a/app/chat/mcp-a2a/page.tsx b/app/chat/mcp-a2a/page.tsx index 05c71445..917faf52 100644 --- a/app/chat/mcp-a2a/page.tsx +++ b/app/chat/mcp-a2a/page.tsx @@ -27,6 +27,9 @@ import { TooltipTrigger, } from '@/ui/tooltip' import { Panel } from '@/src/components/ai-elements/panel' +import { ChatPageShell } from '../components/chat-page-shell' +import { MainSidebar } from '../components/main-sidebar' +import { ChatProvider } from '../providers/chat-context' import { BotIcon, CircleHelpIcon, @@ -105,6 +108,14 @@ export default function McpA2APage() { agents.find((agent) => agent.id === activeAgentId) ?? agents[0] return ( + + } + hideHeader + contentClassName="p-0" + >
@@ -384,5 +395,7 @@ export default function McpA2APage() {
+ + ) } diff --git a/app/chat/observability/page.tsx b/app/chat/observability/page.tsx index 7e482ffe..1c5e372d 100644 --- a/app/chat/observability/page.tsx +++ b/app/chat/observability/page.tsx @@ -39,6 +39,9 @@ import { TooltipTrigger, } from '@/ui/tooltip' import { cn } from '@/lib/utils' +import { ChatPageShell } from '../components/chat-page-shell' +import { MainSidebar } from '../components/main-sidebar' +import { ChatProvider } from '../providers/chat-context' type SpanRecord = Record & { id?: string @@ -410,6 +413,14 @@ export default function ObservabilityPage() { const visibleSpans = spans return ( + + } + hideHeader + contentClassName="p-0" + >
@@ -670,5 +681,7 @@ export default function ObservabilityPage() {
+
+
) -} \ No newline at end of file +} diff --git a/app/chat/providers/chat-context.tsx b/app/chat/providers/chat-context.tsx index 6aa3cebe..6997ac1f 100644 --- a/app/chat/providers/chat-context.tsx +++ b/app/chat/providers/chat-context.tsx @@ -68,6 +68,7 @@ import { import { useCallback, useEffect, useMemo, useRef, useState, type ReactNode } from 'react' import { useAuthQuery } from '@/lib/hooks/use-auth-query' import { useAgent, useAgentModelProviders } from '@/lib/hooks/use-mastra-query' +import { extractThoughtSummaryFromProviderMetadata } from '../components/chat.utils' import { ChatContext } from './chat-context-hooks' const CHAT_PROVIDER_ID_CONTEXT_KEY = 'provider-id' as const @@ -316,13 +317,9 @@ export function ChatProvider({ null ) - const [resourceId, setResourceIdState] = useState( - defaultResourceId ?? '' - ) + const [resourceIdOverride, setResourceIdState] = useState('') - const [threadId, setThreadIdState] = useState( - defaultThreadId ?? '' - ) + const [threadIdOverride, setThreadIdState] = useState('') const [selectedModelId, setSelectedModelId] = useState('') const [isFocusMode, setFocusMode] = useState(false) @@ -335,41 +332,36 @@ export function ChatProvider({ [modelProvidersQuery.data] ) - useEffect(() => { + // Ref to track message snapshots for checkpoint restore + const messageSnapshotsRef = useRef>(new Map()) + + const resourceId = useMemo(() => { + if (resourceIdOverride.length > 0) { + return resourceIdOverride + } + if (defaultResourceId !== undefined && defaultResourceId.trim().length > 0) { - queueMicrotask(() => { - setResourceIdState(defaultResourceId) - }) - return + return defaultResourceId } - if (userId.length > 0) { - queueMicrotask(() => { - setResourceIdState(userId) - }) + return userId + }, [defaultResourceId, resourceIdOverride, userId]) + + const threadId = useMemo(() => { + if (threadIdOverride.length > 0) { + return threadIdOverride } - }, [defaultResourceId, userId]) - useEffect(() => { - if ( - defaultThreadId !== undefined && - defaultThreadId.trim().length > 0 - ) { - queueMicrotask(() => { - setThreadIdState(defaultThreadId) - }) - return + if (defaultThreadId !== undefined && defaultThreadId.trim().length > 0) { + return defaultThreadId } if (userId.length > 0) { - queueMicrotask(() => { - setThreadIdState(`thread:${userId}:${defaultAgent}`) - }) + return `thread:${userId}:${defaultAgent}` } - }, [defaultAgent, defaultThreadId, userId]) - // Ref to track message snapshots for checkpoint restore - const messageSnapshotsRef = useRef>(new Map()) + return '' + }, [defaultAgent, defaultThreadId, threadIdOverride, userId]) const availableModels = useMemo(() => { const modelsById = new Map() @@ -441,6 +433,7 @@ export function ChatProvider({ new DefaultChatTransport({ // Use stable endpoint - agentId passed in body, not URL path api: `${MASTRA_API_URL}/chat/${selectedAgent}`, + credentials: 'include', prepareSendMessagesRequest({ messages: outgoingMessages, requestMetadata, @@ -515,6 +508,20 @@ export function ChatProvider({ useEffect(() => { let cancelled = false + const hasIncompleteMessage = messages.some((message) => { + if (!Array.isArray(message.parts)) { + return true + } + + return message.parts.length === 0 + }) + + if (messages.length === 0 || aiStatus !== 'ready' || hasIncompleteMessage) { + return () => { + cancelled = true + } + } + void safeValidateUIMessages({ messages }).then((result) => { if (cancelled) { return @@ -530,7 +537,7 @@ export function ChatProvider({ return () => { cancelled = true } - }, [messages]) + }, [aiStatus, messages]) const aiErrorMessage = useMemo( () => (aiError ? normalizeChatError(aiError) : null), @@ -586,28 +593,10 @@ export function ChatProvider({ ? (part.callProviderMetadata as ProviderMetadata) : undefined - const googleMeta = providerMetadata?.google as - | Record - | undefined - - if (googleMeta === undefined) { - continue - } - - // Different SDKs/versions surface this under slightly different keys. - const candidates = [ - googleMeta.thoughtSummary, - googleMeta.thoughts, - googleMeta.thinkingSummary, - ] - - for (const candidate of candidates) { - if ( - typeof candidate === 'string' && - candidate.trim().length > 0 - ) { - return candidate - } + const summary = + extractThoughtSummaryFromProviderMetadata(providerMetadata) + if (summary.length > 0) { + return summary } } } diff --git a/app/chat/tools/page.tsx b/app/chat/tools/page.tsx index 56ab3cba..f5de57ea 100644 --- a/app/chat/tools/page.tsx +++ b/app/chat/tools/page.tsx @@ -37,6 +37,9 @@ import { SparklesIcon, WrenchIcon, } from 'lucide-react' +import { ChatPageShell } from '../components/chat-page-shell' +import { MainSidebar } from '../components/main-sidebar' +import { ChatProvider } from '../providers/chat-context' type ToolRecord = { id: string @@ -145,6 +148,14 @@ export default function ChatToolsPage() { const selectedToolDetails = toolDetailsQuery.data ?? selectedTool return ( + + } + hideHeader + contentClassName="p-0" + >
@@ -505,5 +516,7 @@ export default function ChatToolsPage() {
+
+
) -} \ No newline at end of file +} diff --git a/app/chat/user/_components/user-settings-panel.tsx b/app/chat/user/_components/user-settings-panel.tsx index 35bfd7df..5e49d2a9 100644 --- a/app/chat/user/_components/user-settings-panel.tsx +++ b/app/chat/user/_components/user-settings-panel.tsx @@ -112,6 +112,14 @@ type ProfileFormState = { image: string } +export type UserSettingsPanelSection = + | 'all' + | 'profile' + | 'security' + | 'sessions' + | 'api-keys' + | 'danger-zone' + const emptyApiKeyForm: ApiKeyFormState = { name: '', prefix: 'ak_', @@ -171,7 +179,11 @@ function badgeVariantFromBoolean(value: boolean): React.ComponentProps) { event.preventDefault() @@ -414,8 +431,10 @@ export function UserSettingsPanel() {
-
- + {showProfile || showSecurity ? ( +
+ {showProfile ? ( + Profile @@ -502,9 +521,11 @@ export function UserSettingsPanel() { Save profile - + + ) : null} - + {showSecurity ? ( + Security @@ -629,10 +650,13 @@ export function UserSettingsPanel() { Sign out only - -
+
+ ) : null} +
+ ) : null} - + {showSessions ? ( + Live sessions @@ -704,9 +728,11 @@ export function UserSettingsPanel() { - + + ) : null} - + {showApiKeys ? ( + API keys @@ -1016,9 +1042,11 @@ export function UserSettingsPanel() { - + + ) : null} - + {showDangerZone ? ( + Danger zone @@ -1064,10 +1092,12 @@ export function UserSettingsPanel() { - + + ) : null} - !open && setEditingKey(null)}> + {showApiKeys ? ( + !open && setEditingKey(null)}> Edit API key @@ -1192,7 +1222,8 @@ export function UserSettingsPanel() { ) : null} - + + ) : null} ) } diff --git a/app/chat/user/api-keys/page.tsx b/app/chat/user/api-keys/page.tsx new file mode 100644 index 00000000..3e78a6c5 --- /dev/null +++ b/app/chat/user/api-keys/page.tsx @@ -0,0 +1,5 @@ +import { UserSettingsPanel } from '../_components/user-settings-panel' + +export default function UserApiKeysSettingsPage() { + return +} diff --git a/app/chat/user/danger-zone/page.tsx b/app/chat/user/danger-zone/page.tsx new file mode 100644 index 00000000..c16d0bd9 --- /dev/null +++ b/app/chat/user/danger-zone/page.tsx @@ -0,0 +1,5 @@ +import { UserSettingsPanel } from '../_components/user-settings-panel' + +export default function UserDangerZoneSettingsPage() { + return +} diff --git a/app/chat/user/layout.tsx b/app/chat/user/layout.tsx new file mode 100644 index 00000000..67608822 --- /dev/null +++ b/app/chat/user/layout.tsx @@ -0,0 +1,54 @@ +'use client' + +import type { ReactNode } from 'react' + +import { ChatSettingsShell } from '../components/chat-settings-shell' + +const userSettingsSections = [ + { + href: '/chat/user', + title: 'Overview', + description: 'Start from the account summary and jump to the right settings surface.', + }, + { + href: '/chat/user/profile', + title: 'Profile', + description: 'Edit your name, username, and avatar identity.', + }, + { + href: '/chat/user/security', + title: 'Security', + description: 'Change your password, send resets, and manage sign-out posture.', + }, + { + href: '/chat/user/sessions', + title: 'Sessions', + description: 'Inspect active devices and revoke live sessions.', + }, + { + href: '/chat/user/api-keys', + title: 'API keys', + description: 'Issue, rotate, and revoke account-scoped API keys.', + }, + { + href: '/chat/user/danger-zone', + title: 'Danger zone', + description: 'Handle irreversible account deletion controls.', + }, +] as const + +export default function UserSettingsLayout({ + children, +}: { + children: ReactNode +}) { + return ( + + {children} + + ) +} diff --git a/app/chat/user/page.tsx b/app/chat/user/page.tsx index 6bd82815..be9b0c66 100644 --- a/app/chat/user/page.tsx +++ b/app/chat/user/page.tsx @@ -1,18 +1,69 @@ -import { ChatPageShell } from '../components/chat-page-shell' -import { MainSidebar } from '../components/main-sidebar' -import { UserSettingsPanel } from './_components/user-settings-panel' +import Link from 'next/link' + +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/ui/card' +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from '@/ui/tooltip' /** - * User settings page for the signed-in chat account. + * User settings overview for the signed-in chat account. */ export default function UserPage() { + const sections = [ + { + href: '/chat/user/profile', + title: 'Profile', + description: 'Update your display name, username, and avatar.', + }, + { + href: '/chat/user/security', + title: 'Security', + description: 'Rotate your password and control your sign-out posture.', + }, + { + href: '/chat/user/sessions', + title: 'Sessions', + description: 'Inspect and revoke active devices tied to your account.', + }, + { + href: '/chat/user/api-keys', + title: 'API keys', + description: 'Create and manage account-scoped API keys.', + }, + { + href: '/chat/user/danger-zone', + title: 'Danger zone', + description: 'Handle irreversible account deletion actions.', + }, + ] as const + return ( - } - > - - + +
+ {sections.map((section) => ( + + + + + + {section.title} + {section.description} + + + Open the focused {section.title.toLowerCase()} route. + + + + + + {section.description} + + + ))} +
+
) } diff --git a/app/chat/user/profile/page.tsx b/app/chat/user/profile/page.tsx new file mode 100644 index 00000000..7129ee99 --- /dev/null +++ b/app/chat/user/profile/page.tsx @@ -0,0 +1,5 @@ +import { UserSettingsPanel } from '../_components/user-settings-panel' + +export default function UserProfileSettingsPage() { + return +} diff --git a/app/chat/user/security/page.tsx b/app/chat/user/security/page.tsx new file mode 100644 index 00000000..d11e9028 --- /dev/null +++ b/app/chat/user/security/page.tsx @@ -0,0 +1,5 @@ +import { UserSettingsPanel } from '../_components/user-settings-panel' + +export default function UserSecuritySettingsPage() { + return +} diff --git a/app/chat/user/sessions/page.tsx b/app/chat/user/sessions/page.tsx new file mode 100644 index 00000000..a8b73143 --- /dev/null +++ b/app/chat/user/sessions/page.tsx @@ -0,0 +1,5 @@ +import { UserSettingsPanel } from '../_components/user-settings-panel' + +export default function UserSessionsSettingsPage() { + return +} diff --git a/app/chat/workflows/[workflowId]/page.tsx b/app/chat/workflows/[workflowId]/page.tsx index c0c1e382..a8721375 100644 --- a/app/chat/workflows/[workflowId]/page.tsx +++ b/app/chat/workflows/[workflowId]/page.tsx @@ -36,6 +36,9 @@ import { RefreshCwIcon, WorkflowIcon, } from 'lucide-react' +import { ChatPageShell } from '../../components/chat-page-shell' +import { MainSidebar } from '../../components/main-sidebar' +import { ChatProvider } from '../../providers/chat-context' type WorkflowRunRecord = Record @@ -128,6 +131,14 @@ export default function WorkflowDetailPage() { ) return ( + + } + hideHeader + contentClassName="p-0" + >
@@ -399,5 +410,7 @@ export default function WorkflowDetailPage() {
+
+
) -} \ No newline at end of file +} diff --git a/app/chat/workflows/page.tsx b/app/chat/workflows/page.tsx index afabbd3b..c28db423 100644 --- a/app/chat/workflows/page.tsx +++ b/app/chat/workflows/page.tsx @@ -22,6 +22,9 @@ import { GitBranchIcon, PanelRightCloseIcon, } from 'lucide-react' +import { ChatPageShell } from '../components/chat-page-shell' +import { MainSidebar } from '../components/main-sidebar' +import { ChatProvider } from '../providers/chat-context' interface WorkflowRecord { id?: string @@ -67,6 +70,14 @@ export default function ChatWorkflowsPage() { }, [query, workflows]) return ( + + } + hideHeader + contentClassName="p-0" + >
@@ -216,5 +227,7 @@ export default function ChatWorkflowsPage() {
+
+
) } diff --git a/app/chat/workspaces/page.tsx b/app/chat/workspaces/page.tsx index dfd8387c..eb372ba5 100644 --- a/app/chat/workspaces/page.tsx +++ b/app/chat/workspaces/page.tsx @@ -1,6 +1,6 @@ 'use client' -import { useMemo, useState } from 'react' +import { Suspense, useEffect, useMemo, useState } from 'react' import type { SkillMetadata, @@ -25,6 +25,9 @@ import { useWorkspaceSkills, useWorkspaces, } from '@/lib/hooks/use-mastra-query' +import { ChatPageShell } from '@/app/chat/components/chat-page-shell' +import { MainSidebar } from '@/app/chat/components/main-sidebar' +import { ChatProvider } from '@/app/chat/providers/chat-context' import { Badge } from '@/ui/badge' import { Button } from '@/ui/button' import { Card, CardContent, CardHeader, CardTitle } from '@/ui/card' @@ -165,6 +168,24 @@ function parentWorkspacePath(path: string): string { * Explorer for live workspace metadata, skills, and sandbox files. */ export default function WorkspacesPage() { + return ( + + + } + contentClassName="p-0" + hideHeader + > + + + + + ) +} + +function WorkspacesPageContent() { const workspacesQuery = useWorkspaces() const [showHelpPanel, setShowHelpPanel] = useState(true) const [workspaceSearch, setWorkspaceSearch] = useState('') @@ -185,7 +206,7 @@ export default function WorkspacesPage() { const [terminalOutput, setTerminalOutput] = useState('') const workspaces = useMemo( - () => normalizeCollection(workspacesQuery.data, 'workspaces'), + () => workspacesQuery.data ?? [], [workspacesQuery.data] ) @@ -328,6 +349,35 @@ export default function WorkspacesPage() { ] ) + useEffect(() => { + const fallbackWorkspaceId = filteredWorkspaces[0]?.id ?? workspaces[0]?.id ?? '' + const workspaceStillExists = + selectedWorkspaceId.length === 0 || + workspaces.some((workspace) => workspace.id === selectedWorkspaceId) + + if (workspaceStillExists && selectedWorkspaceId.length > 0) { + return + } + + queueMicrotask(() => { + setSelectedWorkspaceId(fallbackWorkspaceId) + }) + }, [filteredWorkspaces, selectedWorkspaceId, workspaces]) + + useEffect(() => { + queueMicrotask(() => { + setSelectedSkillName('') + setFilesystemPath('/') + setSelectedEntryPath('') + setSelectedEntryType('') + setEditorContent('') + setNewFilePath('') + setNewFileContent('') + setNewFolderPath('') + setFilesystemSearch('') + }) + }, [activeWorkspaceId]) + return (
@@ -1388,4 +1438,4 @@ export default function WorkspacesPage() {
) -} \ No newline at end of file +} diff --git a/app/login/page.tsx b/app/login/page.tsx index c1c019ba..31c48ed7 100644 --- a/app/login/page.tsx +++ b/app/login/page.tsx @@ -3,10 +3,21 @@ import Link from 'next/link' import type { Route } from 'next' import { useRouter, useSearchParams } from 'next/navigation' -import { useEffect, useMemo, useState, type SyntheticEvent } from 'react' +import { + Suspense, + useEffect, + useMemo, + useState, + type SyntheticEvent, +} from 'react' import { Eye, EyeOff, Loader2, LogIn, ShieldCheck, Sparkles } from 'lucide-react' -import { authClient } from '@/lib/auth-client' +import { + authClient, + hasGoogleOneTapClient, + signInWithUsername, + startGoogleOneTap, +} from '@/lib/auth-client' import { useAuthQuery } from '@/lib/hooks/use-auth-query' import { Button } from '@/ui/button' import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from '@/ui/card' @@ -25,10 +36,22 @@ function getSafeNextPath(next: string | null): Route { const REMEMBERED_IDENTIFIER_KEY = 'agentstack.auth.remembered-identifier' type LoginSubmitEvent = SyntheticEvent -export default function LoginPage() { +function LoginPageFallback() { + return ( +
+
+ + Loading sign-in... +
+
+ ) +} + +function LoginPageContent() { const router = useRouter() const searchParams = useSearchParams() const authQuery = useAuthQuery() + const isHydrated = true const [isLoading, setIsLoading] = useState(false) const [errorMessage, setErrorMessage] = useState('') const [identifier, setIdentifier] = useState('') @@ -45,6 +68,10 @@ export default function LoginPage() { const canSubmit = normalizedIdentifier.length > 0 && password.length > 0 && !isLoading useEffect(() => { + if (!isHydrated) { + return + } + if (authQuery.isPending) { return } @@ -52,28 +79,21 @@ export default function LoginPage() { if (authQuery.data) { router.replace(nextPath) } - }, [authQuery.data, authQuery.isPending, nextPath, router]) + }, [authQuery.data, authQuery.isPending, isHydrated, nextPath, router]) useEffect(() => { - if (authQuery.isPending || authQuery.data) { + if (!isHydrated) { return } - void authClient.oneTap({ + if (authQuery.isPending || authQuery.data || !hasGoogleOneTapClient) { + return + } + + void startGoogleOneTap({ callbackURL: nextPath, }) - }, [authQuery.data, authQuery.isPending, nextPath]) - - useEffect(() => { - const savedIdentifier = window.localStorage.getItem(REMEMBERED_IDENTIFIER_KEY) - - if (savedIdentifier) { - queueMicrotask(() => { - setIdentifier(savedIdentifier) - setRememberIdentifier(true) - }) - } - }, []) + }, [authQuery.data, authQuery.isPending, isHydrated, nextPath]) /** Starts the Google OAuth flow through Better Auth. */ const handleGoogleSignIn = async () => { @@ -115,7 +135,7 @@ export default function LoginPage() { password, callbackURL: nextPath, }) - : await authClient.signIn.username({ + : await signInWithUsername({ username: normalizedIdentifier, password, callbackURL: nextPath, @@ -131,7 +151,7 @@ export default function LoginPage() { router.replace(nextPath) } - if (authQuery.isPending || authQuery.data) { + if (!isHydrated || authQuery.isPending || authQuery.data) { return (
@@ -366,3 +386,11 @@ export default function LoginPage() {
) } + +export default function LoginPage() { + return ( + }> + + + ) +} diff --git a/app/login/signup/page.tsx b/app/login/signup/page.tsx index a4c96a36..1acb3ac6 100644 --- a/app/login/signup/page.tsx +++ b/app/login/signup/page.tsx @@ -3,10 +3,21 @@ import Link from 'next/link' import type { Route } from 'next' import { useRouter, useSearchParams } from 'next/navigation' -import { useEffect, useMemo, useState, type SyntheticEvent } from 'react' +import { + Suspense, + useEffect, + useMemo, + useState, + type SyntheticEvent, +} from 'react' import { Eye, EyeOff, Loader2, ShieldCheck, Sparkles, UserPlus } from 'lucide-react' -import { authClient } from '@/lib/auth-client' +import { + authClient, + hasGoogleOneTapClient, + signUpWithUsername, + startGoogleOneTap, +} from '@/lib/auth-client' import { useAuthQuery } from '@/lib/hooks/use-auth-query' import { Button } from '@/ui/button' import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from '@/ui/card' @@ -22,10 +33,22 @@ function getSafeNextPath(next: string | null): Route { return '/chat' } -export default function SignupPage() { +function SignupPageFallback() { + return ( +
+
+ + Loading sign-up... +
+
+ ) +} + +function SignupPageContent() { const router = useRouter() const searchParams = useSearchParams() const authQuery = useAuthQuery() + const isHydrated = true const [isLoading, setIsLoading] = useState(false) const [errorMessage, setErrorMessage] = useState('') const [name, setName] = useState('') @@ -54,6 +77,10 @@ export default function SignupPage() { !isLoading useEffect(() => { + if (!isHydrated) { + return + } + if (authQuery.isPending) { return } @@ -61,17 +88,21 @@ export default function SignupPage() { if (authQuery.data) { router.replace(nextPath) } - }, [authQuery.data, authQuery.isPending, nextPath, router]) + }, [authQuery.data, authQuery.isPending, isHydrated, nextPath, router]) useEffect(() => { - if (authQuery.isPending || authQuery.data) { + if (!isHydrated) { return } - void authClient.oneTap({ + if (authQuery.isPending || authQuery.data || !hasGoogleOneTapClient) { + return + } + + void startGoogleOneTap({ callbackURL: nextPath, }) - }, [authQuery.data, authQuery.isPending, nextPath]) + }, [authQuery.data, authQuery.isPending, isHydrated, nextPath]) /** Starts the Google OAuth flow through Better Auth. */ const handleGoogleSignIn = async () => { @@ -119,7 +150,7 @@ export default function SignupPage() { setIsLoading(true) setErrorMessage('') - const response = await authClient.signUp.email({ + const response = await signUpWithUsername({ name: normalizedName, username: normalizedUsername, email: normalizedEmail, @@ -137,7 +168,7 @@ export default function SignupPage() { router.replace(nextPath) } - if (authQuery.isPending || authQuery.data) { + if (!isHydrated || authQuery.isPending || authQuery.data) { return (
@@ -428,3 +459,11 @@ export default function SignupPage() {
) } + +export default function SignupPage() { + return ( + }> + + + ) +} diff --git a/app/networks/providers/network-context.tsx b/app/networks/providers/network-context.tsx index fb42d812..058a0097 100644 --- a/app/networks/providers/network-context.tsx +++ b/app/networks/providers/network-context.tsx @@ -272,6 +272,7 @@ export function NetworkProvider({ } = useChat({ transport: new DefaultChatTransport({ api: `${MASTRA_API_URL}/network/${selectedNetwork}`, + credentials: 'include', prepareSendMessagesRequest({ messages: msgs }) { const last = msgs[msgs.length - 1] const textPart = last?.parts?.find( diff --git a/app/workflows/providers/workflow-context.tsx b/app/workflows/providers/workflow-context.tsx index 6200f71e..36285c2f 100644 --- a/app/workflows/providers/workflow-context.tsx +++ b/app/workflows/providers/workflow-context.tsx @@ -354,6 +354,7 @@ export function WorkflowProvider({ const { messages, sendMessage, stop, status } = useChat({ transport: new DefaultChatTransport({ api: `${MASTRA_API_URL}/workflow/${selectedWorkflow}`, + credentials: 'include', prepareSendMessagesRequest({ messages: msgs }) { const last = msgs[msgs.length - 1] const textPart = last?.parts?.find( diff --git a/lib/auth-client.ts b/lib/auth-client.ts index 3ef5a3ed..b389bfdc 100644 --- a/lib/auth-client.ts +++ b/lib/auth-client.ts @@ -1,5 +1,6 @@ import { createAuthClient } from 'better-auth/react' import { + type GoogleOneTapActionOptions, adminClient, multiSessionClient, oneTapClient, @@ -8,28 +9,60 @@ import { import { apiKeyClient } from '@better-auth/api-key/client' //import { agentAuthClient } from "@better-auth/agent-auth/client"; +const authBaseUrl = + process.env.NEXT_PUBLIC_BETTER_AUTH_URL ?? 'http://localhost:3000' +const publicGoogleClientId = process.env.NEXT_PUBLIC_GOOGLE_CLIENT_ID?.trim() +export const hasGoogleOneTapClient = Boolean(publicGoogleClientId) + export const authClient = createAuthClient({ plugins: [ adminClient(), apiKeyClient(), multiSessionClient(), oneTapClient({ - clientId: process.env.GOOGLE_CLIENT_ID ?? 'your-google-client-id', + clientId: publicGoogleClientId ?? 'missing-google-client-id', autoSelect: true, context: 'signin', uxMode: 'redirect', additionalOptions: { - // Any extra options for the Google initialize method + // Any extra options for the Google initialize method }, - // Configure prompt behavior and exponential backoff: promptOptions: { - baseDelay: 1000, // Base delay in ms (default: 1000) - maxAttempts: 5 // Maximum number of attempts before triggering onPromptNotification (default: 5) - } + baseDelay: 1000, + maxAttempts: 5, + }, }), usernameClient(), // agentAuthClient(), ], - baseURL: process.env.NEXT_PUBLIC_BETTER_AUTH_URL ?? 'http://localhost:3000', + baseURL: authBaseUrl, credentials: 'include', }) + +export async function startGoogleOneTap( + options?: GoogleOneTapActionOptions +) { + if (!hasGoogleOneTapClient) { + return + } + + await authClient.oneTap(options) +} + +export async function signInWithUsername(input: { + username: string + password: string + callbackURL?: string +}) { + return authClient.signIn.username(input) +} + +export async function signUpWithUsername(input: { + name: string + username: string + email: string + password: string + callbackURL?: string +}) { + return authClient.signUp.email(input) +} diff --git a/lib/hooks/use-mastra-query.ts b/lib/hooks/use-mastra-query.ts index f510854e..fcd151c1 100644 --- a/lib/hooks/use-mastra-query.ts +++ b/lib/hooks/use-mastra-query.ts @@ -102,9 +102,9 @@ import type { WorkspaceFsListResponse, WorkspaceFsReadResponse, WorkspaceFsStatResponse, - ListWorkspacesResponse, WorkspaceIndexParams, WorkspaceInfoResponse, + WorkspaceItem, WorkspaceSearchParams, WorkspaceSearchResponse, WorkflowRunResult, @@ -173,6 +173,7 @@ type TracesResponse = CoreListTracesResponse type TraceTrajectoryResponse = Trajectory type ObservabilityLogsResponse = CoreListLogsResponse type VectorIndex = GetVectorIndexResponse & { name: string } +export const DEFAULT_VECTOR_STORE_NAME = 'libsqlvector' as const type McpToolExecuteArgs = Parameters< ReturnType['execute'] >[0] @@ -1838,7 +1839,7 @@ export const useLogTransports: () => UseQueryResult = () => export const useVectorIndexes: ( vectorName?: string -) => UseQueryResult = (vectorName = 'pgVector') => +) => UseQueryResult = (vectorName = DEFAULT_VECTOR_STORE_NAME) => useQuery({ queryKey: mastraQueryKeys.vectors.indexes(vectorName), queryFn: async () => { @@ -1876,9 +1877,12 @@ export const useEmbedders = () => // --- WORKSPACES --- export const useWorkspaces = () => - useQuery({ + useQuery({ queryKey: mastraQueryKeys.workspaces.list(), - queryFn: () => mastraClient.listWorkspaces(), + queryFn: async () => { + const result = await mastraClient.listWorkspaces() + return Array.isArray(result) ? result : result.workspaces ?? [] + }, }) export const useWorkspace = (id: string) => diff --git a/lib/hooks/use-persistent-store.ts b/lib/hooks/use-persistent-store.ts index 53728cbe..409b9451 100644 --- a/lib/hooks/use-persistent-store.ts +++ b/lib/hooks/use-persistent-store.ts @@ -107,13 +107,16 @@ export function usePersistentStore({ } const handleStorage = (event: StorageEvent) => { - if (event.key !== key || event.newValue === null) { + const newValue = event.newValue + + if (event.key !== key || newValue === null) { return } try { const parse = deserialize ?? defaultDeserialize - store.setState(parse(event.newValue)) + // Use functional updater to satisfy Store.setState overload expecting a function + store.setState(() => parse(newValue)) } catch { // Ignore malformed storage payloads. } @@ -135,7 +138,7 @@ export function usePersistentStore({ ) const resetValue = useCallback(() => { - store.setState(initialValue) + store.setState(() => initialValue) }, [initialValue, store]) return { diff --git a/memory-bank/activeContext.md b/memory-bank/activeContext.md index 4d2c622c..9e926c48 100644 --- a/memory-bank/activeContext.md +++ b/memory-bank/activeContext.md @@ -1,3 +1,77 @@ +# Active Context Update (2026-04-15 - chat settings routing and workspace hook cleanup) + +- `app/chat/user/*` and `app/chat/admin/*` now use route-level layouts (`layout.tsx`) built on `app/chat/components/chat-settings-shell.tsx`, which centralizes `ChatProvider`, `ChatPageShell`, and `MainSidebar` for modular settings routes. +- `/chat/user` and `/chat/admin` are now overview landing pages, and focused settings subpages exist for: + - `/chat/user/profile` + - `/chat/user/security` + - `/chat/user/sessions` + - `/chat/user/api-keys` + - `/chat/user/danger-zone` + - `/chat/admin/runtime` + - `/chat/admin/users` +- `UserSettingsPanel` and `AdminSettingsPanel` now accept a section prop so the new routes can render focused slices of the existing Better Auth management surfaces instead of duplicating mutation logic. +- `lib/hooks/use-mastra-query.ts` now normalizes `useWorkspaces()` to return `WorkspaceItem[]` directly, which removed duplicate raw-response normalization from `app/chat/workspaces/page.tsx` and `app/chat/components/chat-sidebar.tsx`. +- The active vector-store default in the chat-facing hook layer remains `libsqlvector`. +- Additional chat routes now also run inside the shared chat shell (`ChatProvider` + `ChatPageShell` + `MainSidebar`) instead of bypassing the sidebar: + - `app/chat/dataset/page.tsx` + - `app/chat/evaluation/page.tsx` + - `app/chat/observability/page.tsx` + - `app/chat/tools/page.tsx` + - `app/chat/logs/page.tsx` + - `app/chat/harness/page.tsx` + - `app/chat/mcp-a2a/page.tsx` + - `app/chat/workflows/page.tsx` + - `app/chat/workflows/[workflowId]/page.tsx` +- The last chat-facing `PgVector` label in `app/chat/config/agents.ts` was replaced with vector-store-neutral wording. +- Shared chat UX was further refined: + - `app/chat/components/main-sidebar.tsx` now uses denser route metadata, tooltip guidance, and `ScrollArea` for long page/thread lists. + - `app/chat/components/chat-settings-shell.tsx` now uses horizontal scrolling plus tooltip-backed section cards for settings navigation. + - `app/chat/components/chat-page-shell.tsx` now uses tighter responsive shell spacing. + - `app/chat/user/page.tsx` and `app/chat/admin/page.tsx` now use tooltip-backed overview cards. +- Targeted IDE diagnostics are clean for the updated settings routes, panels, workspace page, chat sidebar, and Mastra hook file. +- `app/chat/components/main-sidebar.tsx` still shows a stale ESLint diagnostic in the editor even after the flagged line no longer contains any effect or state-setting logic; the current file contents suggest this is a cached lint/server issue rather than a live code problem. + +# Active Context Update (2026-04-15 - research agent model fallback) + +- `src/mastra/agents/researchAgent.ts` no longer pins the route to `google/gemma-4-31b-it:free`. +- The research agent now uses role-aware runtime model selection: + - admin requests use `google.chat('gemini-3.1-pro-preview')` + - standard requests use `google.chat('gemini-3.1-flash-lite-preview')` +- This aligns `researchAgent` with the repo's production-oriented research-agent pattern and avoids the failing free-model default that was breaking `/chat/agents/researchAgent`. + +# Active Context Update (2026-04-15 - supervisor split, GitHub channel, browser hooks) + +- The shared supervisor scorer layer is now split into two tiers: + - `createSupervisorPatternScorer(...)` remains the lower-level coordinator primitive + - `createSupervisorAgentPatternScorer(...)` is the higher-level shared helper for supervisor-style agents +- Additional future-facing shared helpers now exist for channel-oriented and structured-output-oriented supervisors: + - `createSupervisorChannelPatternScorer(...)` + - `createStructuredOutputSupervisorPatternScorer(...)` +- `src/mastra/browsers.ts` is now the central browser policy surface for: + - deterministic `agentBrowser` + - adaptive `stagehandBrowser` + - lifecycle logging hooks + - environment-driven viewport, timeout, and screencast settings +- `browserAgent` now has a much stricter verification prompt focused on evidence, deterministic tool sequencing, and non-destructive browsing. +- `researchAgent` now supports an optional GitHub channel adapter when `GITHUB_WEBHOOK_SECRET` plus PAT or GitHub App credentials are configured; Discord remains enabled as before. +- `researchAgent` channel handling now uses valid Mastra handler overrides (`onDirectMessage`, `onMention`, `onSubscribedMessage`) instead of a non-existent per-platform `github` handler key. +- The subscribed-thread path intentionally ignores acknowledgement-only follow-ups to reduce wasted research cycles in long channel threads. +- The hook layer is now centralized through `handleResearchChannelEvent(...)`, which adds consistent logging and GitHub-thread awareness to the research channel surface. +- Better Auth Google sign-in now routes through the correct Better Auth callback path (`/api/auth/callback/google`), and the auth client no longer depends on client-side access to private env vars. +- The login and signup routes now wrap their `useSearchParams()` usage in `Suspense`, which cleared the Next.js 16 blocking-route runtime error and made browser-based auth testing viable again. +- CLI lint/test execution is still blocked in this session because `pwsh` is unavailable, so browser/research validation relied on targeted IDE diagnostics. + +# Active Context Update (2026-04-15 - supervisor/browser rollout) + +- `src/mastra/scorers/supervisor-scorers.ts` now exposes `createSupervisorPatternScorer(...)` as the shared primitive for supervisor/coordinator completion scoring. +- The current supervisor-style agent set and coordinator-network set now use local scorer wrappers on top of that shared primitive instead of duplicating the full scorer preprocessing pipeline in each file. +- `browserAgent` is now part of the main supervisor surface: + - exported from `src/mastra/agents/index.ts` + - registered in `src/mastra/index.ts` + - mounted in `src/mastra/agents/supervisor-agent.ts` +- `supervisor-agent` delegation guidance now treats browser work as an opt-in verification path for live claims, page behavior, and browser-state evidence rather than a default research step. +- Validation could not be executed from the CLI runtime in this session because `pwsh` is unavailable, so follow-up validation should be run in a shell-enabled environment. + # Active Context Update (2026-04-14 - strict typing and inferred tool cleanup) - `BinanceAvgPrice` is now used in the Binance spot tool via the `BinanceSpotAvgPriceData` type. @@ -1276,3 +1350,13 @@ Added Vercel-style navigation and footer to `app/page.tsx`: 5. Add loading skeletons throughout 6. Type MastraClient responses properly 7. Add unit tests for hooks +# Active Context Update (2026-04-15 - chat route hardening) + +- `app/chat/components/chat.utils.ts` now exposes provider-agnostic thought-summary extraction so chat surfaces no longer assume `providerMetadata.google`. +- `app/chat/components/chat-messages.tsx` and `app/chat/providers/chat-context.tsx` now tolerate arbitrary provider metadata shapes and suppress validation while streamed assistant messages are still incomplete. +- `app/chat/providers/chat-context.tsx` no longer raises a false `Messages array must not be empty` error on empty initial chat state. +- `app/chat/providers/chat-context.tsx`, `app/networks/providers/network-context.tsx`, `app/workflows/providers/workflow-context.tsx`, and `app/chat/components/nested-agent-chat.tsx` now set `credentials: 'include'` on `DefaultChatTransport` so the frontend can authenticate to the cross-origin Mastra server on `http://localhost:4111`. +- `/chat/agents/researchAgent` was reproduced in a real authenticated browser session by creating a Better Auth test user directly against `/api/auth/sign-up/email`. +- The protected research-agent route no longer reproduces the original provider-metadata crash or the initial empty-message error; direct browser fetches confirmed the Mastra backend now returns a valid SSE stream start and tool-input chunks when credentials are included. +- `app/login/page.tsx`, `app/login/signup/page.tsx`, and `app/chat/components/main-sidebar.tsx` now use hydration guards so client-only auth/session/thread UI does not mismatch server HTML during hydration. +- Final browser re-verification after the last chat-message validation patch is blocked in this session because the local Next.js dev server stopped and this environment cannot restart it without `pwsh` or another shell tool. diff --git a/memory-bank/progress.md b/memory-bank/progress.md index 64f5deea..b6ef8c58 100644 --- a/memory-bank/progress.md +++ b/memory-bank/progress.md @@ -1,3 +1,104 @@ +# Progress Update (2026-04-15 - modular chat settings and workspace hook normalization) + +- Added a shared `app/chat/components/chat-settings-shell.tsx` wrapper so route-level settings pages consistently use `ChatProvider`, `ChatPageShell`, and `MainSidebar`. +- Split settings navigation into overview + focused routes: + - user: `profile`, `security`, `sessions`, `api-keys`, `danger-zone` + - admin: `runtime`, `users` +- Updated `app/chat/user/_components/user-settings-panel.tsx` and `app/chat/admin/_components/admin-management-panel.tsx` to accept section props instead of duplicating Better Auth logic across new pages. +- Normalized `useWorkspaces()` in `lib/hooks/use-mastra-query.ts` to return `WorkspaceItem[]`, then removed duplicate raw workspace-response decoding from: + - `app/chat/workspaces/page.tsx` + - `app/chat/components/chat-sidebar.tsx` +- Brought the remaining major chat dashboard surfaces under the shared shell/sidebar composition: + - `dataset` + - `evaluation` + - `observability` + - `tools` + - `logs` + - `harness` + - `mcp-a2a` + - `workflows` + - `workflows/[workflowId]` +- Removed the last chat-facing `PgVector` wording from `app/chat/config/agents.ts`. +- Improved shared UI/UX/cx on the core shell surfaces: + - `main-sidebar.tsx`: added route descriptions, tooltip guidance, and scroll containers for long lists + - `chat-settings-shell.tsx`: added horizontal scroll support and tooltips for section navigation + - `chat-page-shell.tsx`: tightened responsive shell spacing + - `user/page.tsx` and `admin/page.tsx`: added tooltip-backed overview cards +- Validation: + - ✅ targeted IDE diagnostics are clean for: + - `app/chat/components/chat-settings-shell.tsx` + - `app/chat/user/layout.tsx` + - `app/chat/admin/layout.tsx` + - `app/chat/user/page.tsx` + - `app/chat/admin/page.tsx` + - `app/chat/user/_components/user-settings-panel.tsx` + - `app/chat/admin/_components/admin-management-panel.tsx` + - `app/chat/workspaces/page.tsx` + - `app/chat/components/chat-sidebar.tsx` + - `lib/hooks/use-mastra-query.ts` + - `app/chat/dataset/page.tsx` + - `app/chat/evaluation/page.tsx` + - `app/chat/observability/page.tsx` + - `app/chat/tools/page.tsx` + - `app/chat/logs/page.tsx` + - `app/chat/harness/page.tsx` + - `app/chat/mcp-a2a/page.tsx` + - `app/chat/workflows/page.tsx` + - `app/chat/workflows/[workflowId]/page.tsx` + - `app/chat/config/agents.ts` + - `app/chat/components/chat-page-shell.tsx` + - `app/chat/components/chat-settings-shell.tsx` + - `app/chat/user/page.tsx` + - `app/chat/admin/page.tsx` + - ⚠️ `app/chat/components/main-sidebar.tsx` still shows a stale ESLint diagnostic in the editor even though the flagged line no longer contains effect-driven state logic. + +# Progress Update (2026-04-15 - research agent model default repair) + +- Replaced the hard-coded `google/gemma-4-31b-it:free` model in `src/mastra/agents/researchAgent.ts`. +- `researchAgent` now uses a role-aware runtime model selector: + - admin → `google.chat('gemini-3.1-pro-preview')` + - default → `google.chat('gemini-3.1-flash-lite-preview')` +- Validation: + - ✅ targeted VS Code error check on `src/mastra/agents/researchAgent.ts` + +# Progress Update (2026-04-15 - browser and channel hardening) + +- Added a second shared scorer layer for supervisor-style agents in `src/mastra/scorers/supervisor-scorers.ts`: + - `createSupervisorAgentPatternScorer(...)` + - `createSupervisorChannelPatternScorer(...)` + - `createStructuredOutputSupervisorPatternScorer(...)` +- Migrated the active supervisor-style agents to the supervisor-specific shared scorer helper instead of the lower-level base export. +- Hardened `src/mastra/browsers.ts` with: + - environment-driven viewport/timeout/screencast settings + - lifecycle hooks for both deterministic and Stagehand providers + - stronger Stagehand operating instructions +- Upgraded `src/mastra/agents/browserAgent.ts` with a production-grade verification contract and deterministic browser operating workflow. +- Added optional GitHub channel support to `src/mastra/agents/researchAgent.ts`, gated behind the required webhook/auth environment variables so startup remains safe when GitHub is not configured. +- Replaced the invalid per-platform `channels.handlers.github` attempt in `researchAgent` with valid Mastra channel handlers: + - `onDirectMessage` + - `onMention` + - `onSubscribedMessage` +- The subscribed-thread handler now skips acknowledgement-only follow-ups such as `thanks`, `resolved`, or `lgtm` instead of spending another research turn on low-signal churn. +- Strengthened the same handler layer with a shared `handleResearchChannelEvent(...)` helper, GitHub thread detection, and consistent metadata logging across DM, mention, and subscribed-thread events. +- Hardened Better Auth Google wiring by normalizing legacy callback env values onto `/api/auth/callback/google`, tightening client/plugin usage in `lib/auth-client.ts`, and fixing the `/login` + `/login/signup` Suspense boundary issue so the auth pages render cleanly under Next.js 16. +- Enriched `src/mastra/browsers.ts` browser hooks so launch/close events now include connection mode, runtime config, viewport, screencast, environment, and session duration metadata, plus Browserbase credential guardrails. +- Validation: + - ✅ targeted IDE diagnostics are clean for `src/mastra/scorers/supervisor-scorers.ts` + - ✅ targeted IDE diagnostics are clean for `src/mastra/browsers.ts` + - ✅ targeted IDE diagnostics are clean for `src/mastra/agents/browserAgent.ts` + - ⚠️ CLI lint/test validation remains blocked in this session because the runtime shell requires `pwsh`, which is not installed. + +# Progress Update (2026-04-15 - supervisor and coordinator scorer standardization) + +- Added a reusable `createSupervisorPatternScorer(...)` primitive in `src/mastra/scorers/supervisor-scorers.ts` and migrated the active supervisor-style agents plus coordinator networks to local wrappers built on that shared scorer pipeline. +- Wired `browserAgent` into the main `supervisor-agent` surface end to end: + - exported from `src/mastra/agents/index.ts` + - registered in `src/mastra/index.ts` + - mounted as a child agent in `src/mastra/agents/supervisor-agent.ts` +- Tightened `supervisor-agent` delegation guidance so browser work is used only for high-value live verification rather than as a default hop. +- Validation: + - ⚠️ CLI validation commands were blocked in this session because the runtime shell requires `pwsh`, which is not installed in the environment. + # Progress Update (2026-04-14 - strict typing and inferred tool cleanup) - Used `BinanceAvgPrice` in the Binance tool instead of leaving it as an unused helper import. @@ -905,3 +1006,24 @@ - Requires correct environment configuration (database connection, model API keys, financial API keys, `PHOENIX_ENDPOINT`/`PHOENIX_API_KEY`/`PHOENIX_PROJECT_NAME`, etc.) to exercise all capabilities. - A2A coordination complexity grows with new agents; needs careful documentation and evaluation to avoid misalignment. - JWT auth is currently stubbed; until verification is implemented and policies are enforced, flows that depend on strict auth should be treated as experimental. +# Progress Update (2026-04-15 - chat route auth and metadata hardening) + +- Hardened chat-provider metadata handling so the chat UI no longer assumes a Google-specific provider payload: + - `app/chat/components/chat.utils.ts` + - `app/chat/components/chat-messages.tsx` + - `app/chat/providers/chat-context.tsx` +- Fixed the false initial empty-chat validation error in `app/chat/providers/chat-context.tsx`. +- Added `credentials: 'include'` to all `DefaultChatTransport` instances that talk to the Mastra server so authenticated frontend requests can reach `http://localhost:4111`: + - chat + - networks + - workflows + - nested agent demo +- Fixed hydration mismatches on: + - `app/login/page.tsx` + - `app/login/signup/page.tsx` + - `app/chat/components/main-sidebar.tsx` +- Real-browser repro findings: + - ✅ `/chat/agents/researchAgent` no longer crashed on provider metadata access. + - ✅ the route no longer showed the false `Messages array must not be empty` error on first load. + - ✅ authenticated direct fetches to `http://localhost:4111/chat/researchAgent` returned a valid SSE stream start and tool-input chunks once cookies were included. + - ⚠️ the local Next.js dev server stopped before the final post-patch browser pass could be repeated, and this session cannot restart it because the runtime lacks `pwsh` or another usable shell tool. diff --git a/package-lock.json b/package-lock.json index caf07ff0..7756d706 100644 --- a/package-lock.json +++ b/package-lock.json @@ -215,6 +215,7 @@ "unpdf": "^1.6.0", "use-stick-to-bottom": "^1.1.3", "v0-sdk": "^0.16.4", + "zlib-sync": "^0.1.10", "zod": "^4.3.6" }, "devDependencies": { @@ -30763,36 +30764,6 @@ "webidl-conversions": "^3.0.0" } }, - "node_modules/gcp-metadata": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-7.0.1.tgz", - "integrity": "sha512-UcO3kefx6dCcZkgcTGgVOTFb7b1LlQ02hY1omMjjrrBzkajRMCFgYOjs7J71WqnuG1k2b+9ppGL7FsOfhZMQKQ==", - "extraneous": true, - "license": "Apache-2.0", - "dependencies": { - "gaxios": "^7.0.0", - "google-logging-utils": "^1.0.0", - "json-bigint": "^1.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/gcp-metadata/node_modules/gaxios": { - "version": "7.1.4", - "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-7.1.4.tgz", - "integrity": "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA==", - "extraneous": true, - "license": "Apache-2.0", - "dependencies": { - "extend": "^3.0.2", - "https-proxy-agent": "^7.0.1", - "node-fetch": "^3.3.2" - }, - "engines": { - "node": ">=18" - } - }, "node_modules/geckodriver": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/geckodriver/-/geckodriver-6.1.0.tgz", @@ -44456,22 +44427,6 @@ "integrity": "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==", "license": "MIT" }, - "node_modules/tsup/node_modules/yaml": { - "version": "2.8.3", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz", - "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==", - "extraneous": true, - "license": "ISC", - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14.6" - }, - "funding": { - "url": "https://github.com/sponsors/eemeli" - } - }, "node_modules/tsyringe": { "version": "4.10.0", "resolved": "https://registry.npmjs.org/tsyringe/-/tsyringe-4.10.0.tgz", @@ -45539,22 +45494,6 @@ } } }, - "node_modules/vitest/node_modules/yaml": { - "version": "2.8.3", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz", - "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==", - "extraneous": true, - "license": "ISC", - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14.6" - }, - "funding": { - "url": "https://github.com/sponsors/eemeli" - } - }, "node_modules/vscode-jsonrpc": { "version": "8.2.1", "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.1.tgz", @@ -46624,6 +46563,16 @@ "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } }, + "node_modules/zlib-sync": { + "version": "0.1.10", + "resolved": "https://registry.npmjs.org/zlib-sync/-/zlib-sync-0.1.10.tgz", + "integrity": "sha512-t7/pYg5tLBznL1RuhmbAt8rNp5tbhr+TSrJFnMkRtrGIaPJZ6Dc0uR4u3OoQI2d6cGlVI62E3Gy6gwkxyIqr/w==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "nan": "^2.18.0" + } + }, "node_modules/zlibjs": { "version": "0.3.1", "resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz", diff --git a/package.json b/package.json index c9edda31..aa5c7161 100644 --- a/package.json +++ b/package.json @@ -257,6 +257,7 @@ "unpdf": "^1.6.0", "use-stick-to-bottom": "^1.1.3", "v0-sdk": "^0.16.4", + "zlib-sync": "^0.1.10", "zod": "^4.3.6" }, "devDependencies": { diff --git a/page-2026-04-15T07-04-23-082Z.png b/page-2026-04-15T07-04-23-082Z.png new file mode 100644 index 00000000..95338ac8 Binary files /dev/null and b/page-2026-04-15T07-04-23-082Z.png differ diff --git a/src/mastra/agents/AGENTS.md b/src/mastra/agents/AGENTS.md index 6811b9c7..08f45702 100644 --- a/src/mastra/agents/AGENTS.md +++ b/src/mastra/agents/AGENTS.md @@ -1,4 +1,4 @@ - + # Agents (`/src/mastra/agents`) @@ -94,9 +94,14 @@ This directory contains 22+ agent definitions that map use-case intents to seque - **Tool Typing**: Avoid adding `ToolsInput` annotations to agent definitions unless they are truly required. Prefer inferred tool maps (`const tools = { ... }`) with `typeof tools`, or `Record` for tool-less agents. - **Supervisor Delegation**: For agents that define `agents: { ... }`, keep delegation hooks and completion scorers local to that agent so domain-specific routing prompts, failure feedback, and completion checks stay easy to audit. - **Completion Scoring**: Prefer more than one local scorer for supervisor-style agents when a task can be "complete" in different valid ways (for example, a comprehensive answer scorer plus an execution-readiness scorer). +- **Shared Scorer Primitives**: Reuse `createSupervisorPatternScorer(...)` from `src/mastra/scorers/supervisor-scorers.ts` for coordinator-style agents, but keep the final scorer instances and domain regexes local to each agent file. +- **Supervisor-Specific Shared Helpers**: For supervisor-style agent files, prefer `createSupervisorAgentPatternScorer(...)` so agent supervisors can share stronger user-facing defaults without forcing networks or other coordinators onto the exact same shared export. - **Child Agent Boundaries**: Do not mutate unrelated child-agent public generics just to satisfy one parent registration. If a supervisor relationship becomes awkward, prefer changing the parent composition instead of forcing a type-shape change into the child agent. - **Request Context**: Prefer dynamic `instructions: ({ requestContext }) => ...` plus `requestContextSchema` for user-facing supervisor agents so tier, language, user identity, and workspace hints can shape the final output contract safely. - **Supported Hook Surface**: For the current Mastra version in this repo, the main supervisor-style execution hooks are `onDelegationStart`, `onDelegationComplete`, `messageFilter`, `onIterationComplete`, and `isTaskComplete`. Prefer using these directly rather than inventing extra abstraction layers. +- **Browser Delegation**: When a supervisor has access to `browserAgent`, use it only for high-value live verification, page inspection, or browser-state confirmation. Do not browse by default when static research is sufficient. +- **Channel Wiring**: Gate optional channel adapters such as GitHub behind explicit environment checks so local/dev boots do not fail when webhook secrets or auth credentials are absent. +- **Browser Config**: Keep browser-provider lifecycle hooks and provider-specific defaults centralized in `src/mastra/browsers.ts` so agents share one production-grade browser policy surface. ## Execution & Testing @@ -126,6 +131,9 @@ npm test src/mastra/__tests__/agents/your-agent.test.ts | Version | Date (UTC) | Changes | | ------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------ | +| 2.2.10 | 2026-04-15 | Replaced `researchAgent`'s hard-coded free-model default with role-aware Gemini selection so the chat route uses the same production model pattern as the other research agents. | +| 2.2.9 | 2026-04-15 | Added the supervisor-specific shared scorer helper convention (`createSupervisorAgentPatternScorer(...)`), documented environment-gated channel adapters, and standardized centralized browser-provider hooks/config guidance. | +| 2.2.8 | 2026-04-15 | Standardized supervisor-style agents on shared `createSupervisorPatternScorer(...)` primitives with local wrappers and made `supervisor-agent` browser-aware by wiring in `browserAgent` with verification-first delegation guidance. | | 2.2.7 | 2026-03-28 | Made the active supervisor-style agents request-context-aware, added request-context schema validation, and expanded them to use the full supported execution-hook surface from the installed Mastra types. | | 2.2.6 | 2026-03-28 | Added dual completion scorers and explicit final-answer contracts to the active supervisor-style agents; restored `calendarAgent` shape and removed nested PM registration instead of changing the child agent public generic. | | 2.2.5 | 2026-03-27 | Added per-agent delegation hooks and local completion scorers for the active supervisor-style agents (`supervisor-agent`, customer support, project management, SEO, social media, translation) instead of introducing a shared helper layer. | diff --git a/src/mastra/agents/browserAgent.ts b/src/mastra/agents/browserAgent.ts index 7cafd1ec..9e1b6c41 100644 --- a/src/mastra/agents/browserAgent.ts +++ b/src/mastra/agents/browserAgent.ts @@ -8,15 +8,40 @@ export const browserAgent = new Agent({ id: 'browser-agent', name: 'Browser Agent', description: - 'Deterministic browser agent connected to a local Chrome instance through CDP.', - instructions: `You can browse the web using deterministic browser tools. + 'Deterministic browser verification agent connected to a Chrome session through CDP for reproducible live-page inspection.', + instructions: `You are a deterministic browser verification specialist. -Use browser_snapshot first to inspect the page structure, then interact with -elements by their refs (for example @e5). Prefer precise, repeatable actions. -When the task depends on the user’s local browser state, keep the interaction -focused on the connected Chrome session rather than opening a new browser. -`, +Mission: +- Verify live web claims, page behavior, and browser state with the fewest possible actions. +- Produce evidence the caller can trust: URLs, page titles, visible text, control states, and observed outcomes. +- Prefer reproducible browser tools over guesswork or narrative filler. + +Operating workflow: +1. Navigate with browser_goto only when you know the target URL or the caller explicitly asks you to open a page. +2. Start each page interaction with browser_snapshot so you can reason from stable refs like @e5. +3. Use browser_click, browser_type, browser_select, browser_press, browser_scroll, browser_hover, and browser_drag only after you have the correct ref from a fresh snapshot. +4. Use browser_wait after actions that trigger navigation, loading, or deferred UI updates. +5. Use browser_tabs deliberately when comparison or multi-page verification is needed. +6. Use browser_evaluate only as a last-resort escape hatch when deterministic tools cannot expose the required signal. +7. Use browser_dialog only when the page presents an alert, confirm, or prompt that must be handled intentionally. + +Evidence contract: +- Return what you verified, not what you assume. +- Distinguish clearly between verified facts, observed blockers, and unresolved uncertainty. +- Include the final URL and the most important visible evidence for each conclusion. +- If the task depends on the user's local browser state, stay focused on the connected session instead of opening unrelated pages. + +Safety rules: +- Do not perform destructive, account-changing, or purchase-like actions unless the user explicitly asks. +- Do not browse broadly when one or two targeted checks can answer the question. +- If the page is inaccessible or the evidence is weak, say so explicitly.`, model: 'google/gemini-3.1-flash-lite-preview', browser: agentBrowser, memory: LibsqlMemory, -}) \ No newline at end of file + defaultOptions: { + maxSteps: 12, + toolCallConcurrency: 1, + toolChoice: 'auto', + includeRawChunks: true, + }, +}) diff --git a/src/mastra/agents/businessLegalAgents.ts b/src/mastra/agents/businessLegalAgents.ts index 8b58814d..e6a23ef4 100644 --- a/src/mastra/agents/businessLegalAgents.ts +++ b/src/mastra/agents/businessLegalAgents.ts @@ -2,7 +2,7 @@ import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' import { Agent } from '@mastra/core/agent' import { - TokenLimiterProcessor, + //TokenLimiterProcessor, UnicodeNormalizer, } from '@mastra/core/processors' import type { RequestContext } from '@mastra/core/request-context' @@ -246,7 +246,7 @@ You are a Senior Contract Analyst. Analyze legal documents for risks, obligation collapseWhitespace: true, }), ], - outputProcessors: [new TokenLimiterProcessor(1048576)], + //outputProcessors: [new TokenLimiterProcessor(1048576)], defaultOptions: { autoResumeSuspendedTools: true, }, @@ -419,7 +419,7 @@ You are a Chief Strategy Officer with legal expertise. Align business strategy w collapseWhitespace: true, }), ], - outputProcessors: [new TokenLimiterProcessor(1048576)], + //outputProcessors: [new TokenLimiterProcessor(1048576)], //defaultOptions: { // autoResumeSuspendedTools: true, // }, diff --git a/src/mastra/agents/calendarAgent.ts b/src/mastra/agents/calendarAgent.ts index dd6b1ac3..70c5ce0e 100644 --- a/src/mastra/agents/calendarAgent.ts +++ b/src/mastra/agents/calendarAgent.ts @@ -56,7 +56,7 @@ Current user: ${userId ?? 'anonymous'}`, }, } }, - model: "google/gemini-3.1-flash-lite-preview", + model: "google/gemma-4-31b-it", memory: LibsqlMemory, tools: { listEvents, diff --git a/src/mastra/agents/contentStrategistAgent.ts b/src/mastra/agents/contentStrategistAgent.ts index 40855ba8..b3e5d416 100644 --- a/src/mastra/agents/contentStrategistAgent.ts +++ b/src/mastra/agents/contentStrategistAgent.ts @@ -130,12 +130,12 @@ User: ${userId} | Role: ${role} | Style: ${strategy} thinkingBudget: -1, }, mediaResolution: 'MEDIA_RESOLUTION_MEDIUM', - responseModalities: ['TEXT'], + responseModalities: ['TEXT', 'IMAGE'], } satisfies GoogleGenerativeAIProviderOptions, }, } }, - model: 'google/gemini-3.1-flash-preview', + model: 'google/gemma-4-31b-it', memory: LibsqlMemory, tools: contentStrategistTools, options: { diff --git a/src/mastra/agents/customerSupportAgent.ts b/src/mastra/agents/customerSupportAgent.ts index 7f33f65b..b6d5fd22 100644 --- a/src/mastra/agents/customerSupportAgent.ts +++ b/src/mastra/agents/customerSupportAgent.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { log } from '../config/logger' import { InternalSpans } from '@mastra/core/observability' @@ -22,6 +11,7 @@ import { getRoleFromContext, } from './request-context' import { LibsqlMemory } from '../config/libsql' +import { createSupervisorAgentPatternScorer } from '../scorers/supervisor-scorers' log.info('Initializing Customer Support Agent...') @@ -29,220 +19,88 @@ log.info('Initializing Customer Support Agent...') * Evaluates whether a customer-support response contains empathy, a practical resolution, * and clear follow-up guidance. */ -const customerSupportTaskCompleteScorer = createScorer({ +const customerSupportTaskCompleteScorer = createSupervisorAgentPatternScorer({ id: 'customer-support-task-complete', name: 'Customer Support Task Completeness', description: 'Checks whether a support reply includes empathy, concrete next steps, and resolution guidance.', - type: 'agent', + label: 'customer support completeness', + emptyReason: 'No usable customer support response was produced.', + weakReason: 'The response is present but lacks the main support signals.', + strongReasonPrefix: 'This support response is strong because', + responseLengthThresholds: [ + { min: 140, weight: 0.15 }, + { min: 300, weight: 0.1 }, + ], + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it opens with empathy', + regex: /understand|sorry|happy to help|i can help|thanks for sharing/i, + weight: 0.2, + }, + { + label: 'it includes clear actions', + regex: /step 1|1\.|next step|please try|follow these steps|here's what to do/i, + weight: 0.2, + }, + { + label: 'it includes a resolution or escalation path', + regex: /if this does not work|if the issue persists|contact|follow up|escalate/i, + weight: 0.15, + }, + { + label: 'it adds verification guidance', + regex: /verify|confirm|check|test|expected result|what should happen/i, + weight: 0.05, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasEmpathy: - /understand|sorry|happy to help|i can help|thanks for sharing/i.test( - responseText - ), - hasActionableSteps: - /step 1|1\.|next step|please try|follow these steps|here's what to do/i.test( - responseText - ), - hasResolutionPath: - /if this does not work|if the issue persists|contact|follow up|escalate/i.test( - responseText - ), - hasVerification: - /verify|confirm|check|test|expected result|what should happen/i.test( - responseText - ), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.hasUserMessage) score += 0.05 - if (analysis.systemMessageCount > 0) score += 0.05 - if (analysis.responseLength >= 140) score += 0.15 - if (analysis.responseLength >= 300) score += 0.1 - if (analysis.hasEmpathy) score += 0.2 - if (analysis.hasActionableSteps) score += 0.2 - if (analysis.hasResolutionPath) score += 0.15 - if (analysis.hasVerification) score += 0.05 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable customer support response was produced.' - } - - if (analysis.hasEmpathy) parts.push('it opens with empathy') - if (analysis.hasActionableSteps) parts.push('it includes clear actions') - if (analysis.hasResolutionPath) parts.push('it includes a resolution or escalation path') - if (analysis.hasVerification) parts.push('it adds verification guidance') - if (analysis.hasReasoning) parts.push('it includes reasoning support') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This support response is strong because ${parts.join(', ')}.` : 'The response is present but lacks the main support signals.'}` - }) /** * Evaluates whether the support response is concise, operationally clear, and * ready to send to the customer. */ -const customerSupportResolutionScorer = createScorer({ +const customerSupportResolutionScorer = createSupervisorAgentPatternScorer({ id: 'customer-support-resolution-readiness', name: 'Customer Support Resolution Readiness', description: 'Checks whether a support reply provides a usable resolution flow, clear next steps, and escalation guidance.', - type: 'agent', + label: 'customer support resolution', + emptyReason: 'No usable customer support resolution was produced.', + weakReason: 'The response is present but not yet resolution-ready.', + strongReasonPrefix: 'This resolution is strong because', + responseLengthThresholds: [ + { min: 140, weight: 0.2 }, + { min: 260, weight: 0.1 }, + ], + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it gives a step-by-step flow', + regex: /1\.|2\.|step 1|step-by-step|follow these steps/i, + weight: 0.25, + }, + { + label: 'it tells the user how to verify the fix', + regex: /expected result|what should happen|confirm|verify/i, + weight: 0.2, + }, + { + label: 'it includes escalation guidance', + regex: /escalate|contact support|follow up|reply back|reach out/i, + weight: 0.15, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasSequence: - /1\.|2\.|step 1|step-by-step|follow these steps/i.test(responseText), - hasVerification: - /expected result|what should happen|confirm|verify/i.test(responseText), - hasEscalation: - /escalate|contact support|follow up|reply back|reach out/i.test( - responseText - ), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.responseLength >= 140) score += 0.2 - if (analysis.responseLength >= 260) score += 0.1 - if (analysis.hasSequence) score += 0.25 - if (analysis.hasVerification) score += 0.2 - if (analysis.hasEscalation) score += 0.15 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable customer support resolution was produced.' - } - - if (analysis.hasSequence) parts.push('it gives a step-by-step flow') - if (analysis.hasVerification) parts.push('it tells the user how to verify the fix') - if (analysis.hasEscalation) parts.push('it includes escalation guidance') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This resolution is strong because ${parts.join(', ')}.` : 'The response is present but not yet resolution-ready.'}` - }) /** * Customer Support Agent. diff --git a/src/mastra/agents/dane.ts b/src/mastra/agents/dane.ts index e34d11d2..bddd5e39 100644 --- a/src/mastra/agents/dane.ts +++ b/src/mastra/agents/dane.ts @@ -38,12 +38,12 @@ export const daneCommitMessage = new Agent({ includeThoughts: true, thinkingBudget: -1, }, - responseModalities: ['TEXT'], + responseModalities: ['TEXT', 'IMAGE'], } satisfies GoogleGenerativeAIProviderOptions, }, } }, - model: "google/gemini-3.1-flash-lite-preview", + model: "google/gemma-4-31b-it", memory: LibsqlMemory, options: { tracingPolicy: { @@ -83,12 +83,12 @@ export const daneIssueLabeler = new Agent({ includeThoughts: true, thinkingBudget: -1, }, - responseModalities: ['TEXT'], + responseModalities: ['TEXT', 'IMAGE'], } satisfies GoogleGenerativeAIProviderOptions, }, } }, - model: "google/gemini-3.1-flash-lite-preview", + model: "google/gemma-4-31b-it", memory: LibsqlMemory, outputProcessors: [ // new TokenLimiterProcessor(128576), @@ -136,12 +136,12 @@ export const daneLinkChecker = new Agent({ includeThoughts: true, thinkingBudget: -1, }, - responseModalities: ['TEXT'], + responseModalities: ['TEXT', 'IMAGE'], } satisfies GoogleGenerativeAIProviderOptions, }, } }, - model: "google/gemini-3.1-flash-lite-preview", + model: "google/gemma-4-31b-it", memory: LibsqlMemory, options: { tracingPolicy: { @@ -188,12 +188,12 @@ export const daneChangeLog = new Agent({ includeThoughts: true, thinkingBudget: -1, }, - responseModalities: ['TEXT'], + responseModalities: ['TEXT', 'IMAGE'], } satisfies GoogleGenerativeAIProviderOptions, }, } }, - model: "google/gemini-3.1-flash-lite-preview", + model: "google/gemma-4-31b-it", memory: LibsqlMemory, defaultOptions: { autoResumeSuspendedTools: true, @@ -258,12 +258,12 @@ export const dane = new Agent({ includeThoughts: true, thinkingLevel: 'low', }, - responseModalities: ['TEXT'], + responseModalities: ['TEXT', 'IMAGE'], } satisfies GoogleGenerativeAIProviderOptions, }, } }, - model: 'google/gemini-3.1-flash-preview', + model: 'google/gemma-4-31b-it', memory: LibsqlMemory, tools: { browserTool, diff --git a/src/mastra/agents/dataExportAgent.ts b/src/mastra/agents/dataExportAgent.ts index 60493c1e..23446318 100644 --- a/src/mastra/agents/dataExportAgent.ts +++ b/src/mastra/agents/dataExportAgent.ts @@ -3,7 +3,7 @@ import { Agent } from '@mastra/core/agent' import { log } from '../config/logger' import { InternalSpans } from '@mastra/core/observability' -import { TokenLimiterProcessor } from '@mastra/core/processors' +//import { TokenLimiterProcessor } from '@mastra/core/processors' import { USER_ID_CONTEXT_KEY, type AgentRequestContext } from './request-context' import { jsonToCsvTool } from '../tools/json-to-csv.tool' import { LibsqlMemory } from '../config/libsql' @@ -65,7 +65,7 @@ User: ${userId} | Out: ${outputDirectory} | Overwrite: ${overwriteExisting} internal: InternalSpans.ALL, }, }, - outputProcessors: [new TokenLimiterProcessor(1048576)], + //outputProcessors: [new TokenLimiterProcessor(1048576)], // defaultOptions: { // autoResumeSuspendedTools: true, // }, diff --git a/src/mastra/agents/documentProcessingAgent.ts b/src/mastra/agents/documentProcessingAgent.ts index 16826d3c..69b9058c 100644 --- a/src/mastra/agents/documentProcessingAgent.ts +++ b/src/mastra/agents/documentProcessingAgent.ts @@ -4,7 +4,7 @@ import { log } from '../config/logger' import { libsqlChunker, mastraChunker } from '../tools/document-chunking.tool' -import { TokenLimiterProcessor } from '@mastra/core/processors' +//import { TokenLimiterProcessor } from '@mastra/core/processors' import { InternalSpans } from '@mastra/core/observability' import type { AgentRequestContext } from './request-context' import { USER_ID_CONTEXT_KEY } from './request-context' @@ -81,7 +81,7 @@ User: ${userId} | In: ${inputDirectory} | Out: ${outputDirectory} internal: InternalSpans.ALL, }, }, - outputProcessors: [new TokenLimiterProcessor(1048576)], + //outputProcessors: [new TokenLimiterProcessor(1048576)], }) log.info('Document Processing Agent initialized') diff --git a/src/mastra/agents/editorAgent.ts b/src/mastra/agents/editorAgent.ts index b2388e7d..9c0661e5 100644 --- a/src/mastra/agents/editorAgent.ts +++ b/src/mastra/agents/editorAgent.ts @@ -59,7 +59,7 @@ Refine clarity, coherence, grammar, and style across Technical, Business, Creati includeThoughts: true, thinkingLevel: 'medium', }, - responseModalities: ['TEXT'], + responseModalities: ['TEXT', 'IMAGE'], } satisfies GoogleLanguageModelOptions, }, } @@ -74,7 +74,7 @@ Refine clarity, coherence, grammar, and style across Technical, Business, Creati return google.chat('gemini-3.1-pro-preview') } // cheaper/faster model for user tier - return google.chat('gemini-3.1-flash-lite-preview') + return "google/gemma-4-31b-it" }, memory: LibsqlMemory, tools: {}, diff --git a/src/mastra/agents/excalidraw_validator.ts b/src/mastra/agents/excalidraw_validator.ts index 35807819..38311f0f 100644 --- a/src/mastra/agents/excalidraw_validator.ts +++ b/src/mastra/agents/excalidraw_validator.ts @@ -1,7 +1,7 @@ import { Agent } from '@mastra/core/agent' import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' import type { RequestContext } from '@mastra/core/request-context' -import { TokenLimiterProcessor } from '@mastra/core/processors' +//import { TokenLimiterProcessor } from '@mastra/core/processors' import { InternalSpans } from '@mastra/core/observability' import type { AgentRequestContext } from './request-context' import { LibsqlMemory } from '../config/libsql' @@ -100,13 +100,13 @@ You can update the JSON to be valid and ensure it matches the expected excalidra includeThoughts: true, thinkingBudget: -1, }, - responseModalities: ['TEXT'], + responseModalities: ['TEXT', 'IMAGE'], mediaResolution: 'MEDIA_RESOLUTION_MEDIUM', } satisfies GoogleGenerativeAIProviderOptions, }, } }, - model: "google/gemini-3.1-flash-lite-preview", + model: "google/gemma-4-31b-it", memory: LibsqlMemory, tools: {}, scorers: {}, @@ -117,5 +117,5 @@ You can update the JSON to be valid and ensure it matches the expected excalidra }, }, maxRetries: 5, - outputProcessors: [new TokenLimiterProcessor(1048576)], + //outputProcessors: [new TokenLimiterProcessor(1048576)], }) diff --git a/src/mastra/agents/graphingAgents.ts b/src/mastra/agents/graphingAgents.ts index 7d3df465..ca44958b 100644 --- a/src/mastra/agents/graphingAgents.ts +++ b/src/mastra/agents/graphingAgents.ts @@ -1,7 +1,7 @@ import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' import { Agent } from '@mastra/core/agent' import { - TokenLimiterProcessor, + //TokenLimiterProcessor, UnicodeNormalizer, } from '@mastra/core/processors' import type { RequestContext } from '@mastra/core/request-context' @@ -141,7 +141,7 @@ Rules and best practices: }, inputProcessors: [ new UnicodeNormalizer({ - stripControlChars: false, + stripControlChars: true, collapseWhitespace: true, preserveEmojis: true, trim: true, @@ -274,7 +274,7 @@ export const fetchAgent = new Agent({ internal: InternalSpans.ALL, }, }, - outputProcessors: [new TokenLimiterProcessor(32768)], + //outputProcessors: [new TokenLimiterProcessor(32768)], }) /** diff --git a/src/mastra/agents/image.ts b/src/mastra/agents/image.ts index bf8f9b49..3cece2a4 100644 --- a/src/mastra/agents/image.ts +++ b/src/mastra/agents/image.ts @@ -56,7 +56,7 @@ export const imageAgent = new Agent({ }, } }, - model: googleAINanoBanana, + model: 'google/gemma-4-31b-it', memory: LibsqlMemory, options: { tracingPolicy: { diff --git a/src/mastra/agents/image_to_csv.ts b/src/mastra/agents/image_to_csv.ts index ebde344b..fec9c193 100644 --- a/src/mastra/agents/image_to_csv.ts +++ b/src/mastra/agents/image_to_csv.ts @@ -3,7 +3,7 @@ import { googleAI } from '../config' import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' import type { RequestContext } from '@mastra/core/request-context' -import { TokenLimiterProcessor } from '@mastra/core/processors' +//import { TokenLimiterProcessor } from '@mastra/core/processors' import { InternalSpans } from '@mastra/core/observability' import type { AgentRequestContext } from './request-context' import { LibsqlMemory } from '../config/libsql' @@ -157,5 +157,5 @@ IMPORTANT: Only return the CSV string including the header row. Do not include a }, workflows: {}, maxRetries: 5, - outputProcessors: [new TokenLimiterProcessor(1048576)], + // outputProcessors: [new TokenLimiterProcessor(1048576)], }) diff --git a/src/mastra/agents/index.ts b/src/mastra/agents/index.ts index 1ead0928..4e52ba05 100644 --- a/src/mastra/agents/index.ts +++ b/src/mastra/agents/index.ts @@ -22,6 +22,7 @@ export type { CalendarContext } from './calendarAgent' export type { ScriptWriterRuntimeContext } from './scriptWriterAgent' export { acpAgent } from './acpAgent' +export { browserAgent } from './browserAgent' export { legalResearchAgent, contractAnalysisAgent, diff --git a/src/mastra/agents/learningExtractionAgent.ts b/src/mastra/agents/learningExtractionAgent.ts index ec1705fd..db4cd532 100644 --- a/src/mastra/agents/learningExtractionAgent.ts +++ b/src/mastra/agents/learningExtractionAgent.ts @@ -2,7 +2,7 @@ import { Agent } from '@mastra/core/agent' import { log } from '../config/logger' import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' -import { TokenLimiterProcessor } from '@mastra/core/processors' +//import { TokenLimiterProcessor } from '@mastra/core/processors' import { InternalSpans } from '@mastra/core/observability' import { getRoleFromContext, @@ -73,5 +73,5 @@ Extract the single most important learning and create one relevant follow-up que }, workflows: {}, maxRetries: 5, - outputProcessors: [new TokenLimiterProcessor(128000)], + // outputProcessors: [new TokenLimiterProcessor(128000)], }) diff --git a/src/mastra/agents/noteTakerAgent.ts b/src/mastra/agents/noteTakerAgent.ts index 9e96a7bb..c4f020c8 100644 --- a/src/mastra/agents/noteTakerAgent.ts +++ b/src/mastra/agents/noteTakerAgent.ts @@ -13,7 +13,7 @@ export const noteTakerAgent = new Agent({ instructions: instructions1, memory: LibsqlMemory, // tools: [], - model: 'google/gemini-3.1-flash-lite-preview', + model: 'google/gemma-4-31b-it', options: { tracingPolicy: { internal: InternalSpans.ALL, diff --git a/src/mastra/agents/package-publisher.ts b/src/mastra/agents/package-publisher.ts index 74372d69..5926bea1 100644 --- a/src/mastra/agents/package-publisher.ts +++ b/src/mastra/agents/package-publisher.ts @@ -1,7 +1,7 @@ import { Agent } from '@mastra/core/agent' import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' -import { TokenLimiterProcessor } from '@mastra/core/processors' +//import { TokenLimiterProcessor } from '@mastra/core/processors' import { getLanguageFromContext, getRoleFromContext, @@ -168,5 +168,5 @@ export const danePackagePublisher = new Agent({ }, }, scorers: {}, - outputProcessors: [new TokenLimiterProcessor(128000)], + //outputProcessors: [new TokenLimiterProcessor(128000)], }) diff --git a/src/mastra/agents/projectManagementAgent.ts b/src/mastra/agents/projectManagementAgent.ts index cc99d06b..8af43250 100644 --- a/src/mastra/agents/projectManagementAgent.ts +++ b/src/mastra/agents/projectManagementAgent.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { log } from '../config/logger' import { reportAgent } from './reportAgent' @@ -22,6 +11,7 @@ import { getRoleFromContext, } from './request-context' import { LibsqlMemory } from '../config/libsql' +import { createSupervisorAgentPatternScorer } from '../scorers/supervisor-scorers' log.info('Initializing Project Management Agent...') @@ -29,207 +19,94 @@ log.info('Initializing Project Management Agent...') * Evaluates whether a project-management response contains a practical plan, * scheduling awareness, and visible risk management. */ -const projectManagementTaskCompleteScorer = createScorer({ +const projectManagementTaskCompleteScorer = createSupervisorAgentPatternScorer({ id: 'project-management-task-complete', name: 'Project Management Task Completeness', description: 'Checks whether a project response includes timeline, deliverables, risks, and next actions.', - type: 'agent', + label: 'project management completeness', + emptyReason: 'No usable project management response was produced.', + weakReason: 'The response is present but lacks planning depth.', + strongReasonPrefix: 'This project response is strong because', + responseLengthThresholds: [ + { min: 180, weight: 0.15 }, + { min: 350, weight: 0.15 }, + ], + minParagraphsForStructure: 3, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it includes timeline or milestone guidance', + regex: /timeline|schedule|deadline|milestone/i, + weight: 0.2, + }, + { + label: 'it calls out risks or blockers', + regex: /risk|blocker|dependency|constraint/i, + weight: 0.2, + }, + { + label: 'it includes ownership or stakeholder guidance', + regex: /owner|stakeholder|resource|team/i, + weight: 0.15, + }, + { + label: 'it ends with concrete next actions', + regex: /next step|action item|deliverable/i, + weight: 0.15, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasTimeline: /timeline|schedule|deadline|milestone/i.test(responseText), - hasRisk: /risk|blocker|dependency|constraint/i.test(responseText), - hasOwnership: /owner|stakeholder|resource|team/i.test(responseText), - hasActions: /next step|action item|deliverable/i.test(responseText), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText) || - responseText.split(/\n\s*\n/).filter(Boolean).length >= 3, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.responseLength >= 180) score += 0.15 - if (analysis.responseLength >= 350) score += 0.15 - if (analysis.hasTimeline) score += 0.2 - if (analysis.hasRisk) score += 0.2 - if (analysis.hasOwnership) score += 0.15 - if (analysis.hasActions) score += 0.15 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable project management response was produced.' - } - - if (analysis.hasTimeline) parts.push('it includes timeline or milestone guidance') - if (analysis.hasRisk) parts.push('it calls out risks or blockers') - if (analysis.hasOwnership) parts.push('it includes ownership or stakeholder guidance') - if (analysis.hasActions) parts.push('it ends with concrete next actions') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This project response is strong because ${parts.join(', ')}.` : 'The response is present but lacks planning depth.'}` - }) /** * Evaluates whether the project-management response is execution-ready with a * clear plan shape, sequencing, and ownership or decision guidance. */ -const projectManagementExecutionScorer = createScorer({ +const projectManagementExecutionScorer = createSupervisorAgentPatternScorer({ id: 'project-management-execution-readiness', name: 'Project Management Execution Readiness', description: 'Checks whether a PM response includes priorities, sequencing, accountability, and decision-ready next actions.', - type: 'agent', + label: 'project management execution', + emptyReason: 'No usable project management execution plan was produced.', + weakReason: 'The response is present but still lacks execution detail.', + strongReasonPrefix: 'This execution plan is strong because', + responseLengthThresholds: [ + { min: 160, weight: 0.15 }, + { min: 280, weight: 0.1 }, + ], + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it clarifies priority or sequencing', + regex: /priority|p0|p1|phase|sequence|order/i, + weight: 0.2, + }, + { + label: 'it names owners or accountable parties', + regex: /owner|stakeholder|team|responsible/i, + weight: 0.2, + }, + { + label: 'it calls out decisions or trade-offs', + regex: /decision|assumption|trade-off|escalation/i, + weight: 0.2, + }, + { + label: 'it includes immediate next actions', + regex: /next step|immediate action|this week|milestone/i, + weight: 0.1, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasPriority: /priority|p0|p1|phase|sequence|order/i.test(responseText), - hasAccountability: - /owner|stakeholder|team|responsible/i.test(responseText), - hasDecisionSupport: - /decision|assumption|trade-off|escalation/i.test(responseText), - hasUrgency: - /next step|immediate action|this week|milestone/i.test(responseText), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.responseLength >= 160) score += 0.15 - if (analysis.responseLength >= 280) score += 0.1 - if (analysis.hasPriority) score += 0.2 - if (analysis.hasAccountability) score += 0.2 - if (analysis.hasDecisionSupport) score += 0.2 - if (analysis.hasUrgency) score += 0.1 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable project management execution plan was produced.' - } - - if (analysis.hasPriority) parts.push('it clarifies priority or sequencing') - if (analysis.hasAccountability) parts.push('it names owners or accountable parties') - if (analysis.hasDecisionSupport) parts.push('it calls out decisions or trade-offs') - if (analysis.hasUrgency) parts.push('it includes immediate next actions') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This execution plan is strong because ${parts.join(', ')}.` : 'The response is present but still lacks execution detail.'}` - }) export const projectManagementAgent = new Agent< 'project-management-agent', diff --git a/src/mastra/agents/recharts.ts b/src/mastra/agents/recharts.ts index 5dce6cc8..bf41f9d4 100644 --- a/src/mastra/agents/recharts.ts +++ b/src/mastra/agents/recharts.ts @@ -2,10 +2,9 @@ import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' import { Agent } from '@mastra/core/agent' import { InternalSpans } from '@mastra/core/observability' import { - TokenLimiterProcessor, + //TokenLimiterProcessor, UnicodeNormalizer } from '@mastra/core/processors' -import { googleAI, googleAIFlashLite } from '../config' import { log } from '../config/logger' import { alphaVantageStockTool } from '../tools/alpha-vantage.tool' import { @@ -140,7 +139,7 @@ You are a Financial Data Visualization Specialist focused on recommending optima }, } }, - model: googleAIFlashLite, + model: 'google/gemma-4-31b-it', memory: LibsqlMemory, tools: {}, maxRetries: 3, @@ -149,7 +148,6 @@ You are a Financial Data Visualization Specialist focused on recommending optima internal: InternalSpans.ALL, }, }, - outputProcessors: [new TokenLimiterProcessor(1048576)], }) /** @@ -199,7 +197,7 @@ You are a Financial Data Processing Specialist that transforms raw API data into }, } }, - model: googleAIFlashLite, + model: 'google/gemma-4-31b-it', tools: chartDataProcessorTools, memory: LibsqlMemory, options: { @@ -208,7 +206,7 @@ You are a Financial Data Processing Specialist that transforms raw API data into }, }, maxRetries: 3, - outputProcessors: [new TokenLimiterProcessor(1048576)], + //outputProcessors: [new TokenLimiterProcessor(1048576)], }) /** @@ -259,7 +257,7 @@ You are a Senior React Developer specializing in Recharts financial visualizatio }, } }, - model: googleAI, + model: 'google/gemma-4-31b-it', memory: LibsqlMemory, tools: {}, maxRetries: 3, @@ -324,7 +322,7 @@ You are the Financial Chart Supervisor, orchestrating the complete chart creatio }, } }, - model: googleAI, + model: 'google/gemma-4-31b-it', tools: chartSupervisorTools, memory: LibsqlMemory, options: { diff --git a/src/mastra/agents/reportAgent.ts b/src/mastra/agents/reportAgent.ts index 039008fe..833f2ba3 100644 --- a/src/mastra/agents/reportAgent.ts +++ b/src/mastra/agents/reportAgent.ts @@ -3,9 +3,6 @@ import { log } from '../config/logger' import type { GoogleLanguageModelOptions } from '@ai-sdk/google' import { InternalSpans } from '@mastra/core/observability' -import { - TokenLimiterProcessor -} from '@mastra/core/processors' import { getLanguageFromContext, getRoleFromContext, @@ -66,7 +63,7 @@ export const reportAgent = new Agent({ return "google/gemini-3.1-flash-lite-preview" } // cheaper/faster model for free tier - return "google/gemini-3.1-flash-lite-preview" + return "google/gemma-4-31b-it" }, memory: LibsqlMemory, tools: {}, @@ -78,14 +75,6 @@ export const reportAgent = new Agent({ scorers: {}, workflows: {}, maxRetries: 5, - outputProcessors: [ - new TokenLimiterProcessor(1048576), - // new BatchPartsProcessor({ - // batchSize: 5, - // maxWaitTime: 75, - // emitOnNonText: true, - // }), - ], }) // --- IGNORE --- diff --git a/src/mastra/agents/researchAgent.ts b/src/mastra/agents/researchAgent.ts index 12dc1470..246c31c1 100644 --- a/src/mastra/agents/researchAgent.ts +++ b/src/mastra/agents/researchAgent.ts @@ -1,6 +1,7 @@ import { libsqlQueryTool, libsqlgraphQueryTool } from './../config/libsql'; import { libsqlChunker, mdocumentChunker } from './../tools/document-chunking.tool'; import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' +import type { Message, Thread } from 'chat' import { Agent } from '@mastra/core/agent' import { log } from '../config/logger' import { evaluateResultTool } from '../tools/evaluateResultTool' @@ -24,8 +25,8 @@ import { yahooFinanceStockQuotesTool } from '../tools/yahoo-finance-stock.tool' // Scorers import { InternalSpans } from '@mastra/core/observability' -import { AgentChannels } from '@mastra/core/channels' -import { mainWorkspace } from '../workspaces' +import type { ChannelHandlers } from '@mastra/core/channels' +import * as workspaces from '../workspaces' import { getLanguageFromContext, getRoleFromContext, @@ -35,16 +36,180 @@ import { researchArxivDownloadWorkflow } from '../workflows/research/research-ar import { researchArxivSearchWorkflow } from '../workflows/research/research-arxiv-search.workflow' import { LibsqlMemory } from '../config/libsql' import { listRepositories } from '../tools/github'; -import { agentBrowser } from '../browsers'; -//import { createGitHubAdapter } from "@chat-adapter/github"; +import * as browsers from '../browsers'; +import { createGitHubAdapter } from '@chat-adapter/github' import { createDiscordAdapter } from '@chat-adapter/discord' - +import { google } from '../config/google' +import { + ToolSearchProcessor, + //TokenLimiter +} from '@mastra/core/processors' //const github = createGitHubAdapter({ // //appId: process.env.GITHUB_APP_ID!, // //privateKey: process.env.GITHUB_PRIVATE_KEY!, // // webhookSecret: process.env.GITHUB_WEBHOOK_SECRET!, //}); +/** + * Enables the GitHub channel only when the webhook secret and at least one + * supported authentication path are configured in the environment. + */ +function isGitHubChannelConfigured(): boolean { + const hasWebhookSecret = Boolean(process.env.GITHUB_WEBHOOK_SECRET?.trim()) + const hasToken = Boolean(process.env.GITHUB_TOKEN?.trim()) + const hasAppAuth = Boolean( + process.env.GITHUB_APP_ID?.trim() && + process.env.GITHUB_PRIVATE_KEY?.trim() + ) + + return hasWebhookSecret && (hasToken || hasAppAuth) +} + +const researchAgentChannelAdapters = { + discord: { + adapter: createDiscordAdapter(), + gateway: false, + }, + ...(isGitHubChannelConfigured() + ? { + github: { + adapter: createGitHubAdapter({ + userName: + process.env.GITHUB_BOT_USERNAME?.trim() ?? 'research-agent', + }), + gateway: false, + cards: false, + }, + } + : {}), +} + +/** + * Normalizes the message text available from channel adapters so handler logic + * can make lightweight decisions without depending on one platform shape. + */ +function getChannelMessageText(message: { + text?: string + content?: unknown +}): string { + if (typeof message.text === 'string' && message.text.trim().length > 0) { + return message.text.trim() + } + + if (typeof message.content === 'string' && message.content.trim().length > 0) { + return message.content.trim() + } + + return '' +} + +/** + * Detects low-signal follow-up messages that do not warrant another full + * research pass when the agent is already subscribed to the thread. + */ +function isAcknowledgementOnlyMessage(messageText: string): boolean { + return /^(thanks|thank you|resolved|done|fixed|closed|lgtm|sgtm|looks good)[.!]?$/i.test( + messageText.trim() + ) +} + +/** + * Detects GitHub-backed channel threads from the Chat SDK thread ID format. + */ +function isGitHubThread(thread: Thread): boolean { + return thread.id.startsWith('github:') +} + +type ResearchChannelEvent = + | 'direct-message' + | 'mention' + | 'subscribed-message' + +/** + * Centralizes research-channel hook behavior so every handler logs the same + * metadata and applies the same low-signal suppression rules. + */ +async function handleResearchChannelEvent( + event: ResearchChannelEvent, + thread: Thread, + message: Message, + defaultHandler: (thread: Thread, message: Message) => Promise, + options?: { + skipAcknowledgements?: boolean + } +): Promise { + const messageText = getChannelMessageText(message) + const acknowledgementOnly = isAcknowledgementOnlyMessage(messageText) + const githubThread = isGitHubThread(thread) + + log.info('Research channel event', { + event, + threadId: thread.id, + platform: githubThread ? 'github' : 'chat', + textLength: messageText.length, + acknowledgementOnly, + }) + + if (options?.skipAcknowledgements && acknowledgementOnly) { + log.info('Research channel event skipped', { + event, + threadId: thread.id, + reason: 'acknowledgement-only', + platform: githubThread ? 'github' : 'chat', + }) + return + } + + await defaultHandler(thread, message) +} + +const researchChannelHandlers: ChannelHandlers = { + onDirectMessage: async (thread, message, defaultHandler) => { + await handleResearchChannelEvent( + 'direct-message', + thread, + message, + defaultHandler + ) + }, + onMention: async (thread, message, defaultHandler) => { + await handleResearchChannelEvent( + 'mention', + thread, + message, + defaultHandler, + { + skipAcknowledgements: true, + } + ) + }, + onSubscribedMessage: async (thread, message, defaultHandler) => { + await handleResearchChannelEvent( + 'subscribed-message', + thread, + message, + defaultHandler, + { + skipAcknowledgements: true, + } + ) + }, +} + +/** + * Returns the shared workspace used by the research agent. + */ +function getResearchAgentWorkspace() { + return workspaces.mainWorkspace +} + +/** + * Returns the deterministic browser configured for research verification. + */ +function getResearchAgentBrowser() { + return browsers.agentBrowser +} + type ResearchPhase = 'initial' | 'followup' | 'validation' const RESEARCH_PHASE_CONTEXT_KEY = 'researchPhase' as const @@ -117,7 +282,8 @@ Role: ${role} | Lang: ${language} | Phase: ${researchPhase} ## Tool Selection Guide - **Web**: Prefer 'fetchTool' for reliable URL fetch/search to markdown. -- **News/Trends**: 'googleNewsTool', 'googleTrendsTool', 'googleFinanceTool'. +- **Live browser verification**: Use the attached browser only when page state, interaction results, or live UI evidence materially matters more than static fetch output. +- **News/Trends**: 'googleNewsLiteTool', 'googleTrendsTool', 'googleFinanceTool'. - **Academic**: 'googleScholarTool'. - **Financial**: Use 'polygon*' for stocks/crypto. - **Financial**: Use 'polygon*' for stocks/crypto when you need paid/commercial feeds; use 'binanceSpotMarketDataTool' for free crypto spot data and batch lookups of 1-10 symbols; use 'coinbaseExchangeMarketDataTool', 'stooqStockQuotesTool', and 'yahooFinanceStockQuotesTool' for free public market data. @@ -129,6 +295,7 @@ Role: ${role} | Lang: ${language} | Phase: ${researchPhase} - **Efficiency**: No repetitive or back-to-back tool calls for the same query. - **Specificity**: Use focused queries; cite sources with confidence levels. - **Fallback**: If tools fail, use internal knowledge and state failure. +- **GitHub channel delivery**: If the request arrives from a GitHub issue or PR comment thread, respond in concise GitHub-flavored Markdown with a direct answer, bullet findings, source links, and the clearest next action or blocker. `, providerOptions: { google: { @@ -141,11 +308,14 @@ Role: ${role} | Lang: ${language} | Phase: ${researchPhase} }, } }, - model: { - url: "https://api.kilo.ai/api/gateway", - id:'kilo/x-ai/grok-code-fast-1:optimized:free', - apiKey: process.env.KILO_API_KEY, - provider: 'kilo', + model: ({ requestContext }) => { + const role = getRoleFromContext(requestContext) + + if (role === 'admin') { + return google.chat('gemini-3.1-pro-preview') + } + + return google.chat('gemini-3.1-flash-lite-preview') }, tools: researchAgentTools, workflows: { researchArxivDownloadWorkflow, researchArxivSearchWorkflow }, @@ -162,6 +332,13 @@ Role: ${role} | Lang: ${language} | Phase: ${researchPhase} }, }, //voice: gvoice, + inputProcessors: [ + new ToolSearchProcessor({ + tools: researchAgentTools, + search: { topK: 5 }, + }), + //new TokenLimiter(2048), + ], outputProcessors: [ // new TokenLimiterProcessor(128000), // new BatchPartsProcessor({ @@ -170,16 +347,17 @@ Role: ${role} | Lang: ${language} | Phase: ${researchPhase} // emitOnNonText: true, // }), ], - workspace: mainWorkspace, - browser: agentBrowser, - channels: new AgentChannels({ - adapters: { - discord: { - adapter: createDiscordAdapter(), - gateway: false, - }, + workspace: getResearchAgentWorkspace(), + browser: getResearchAgentBrowser(), + channels: { + inlineLinks: ['*'], + inlineMedia: ['image/*', 'video/*', 'audio/*'], + adapters: researchAgentChannelAdapters, + threadContext: { + maxMessages: 15, }, - }), + handlers: researchChannelHandlers, + }, // defaultOptions: { // autoResumeSuspendedTools: true, // }, diff --git a/src/mastra/agents/researchPaperAgent.ts b/src/mastra/agents/researchPaperAgent.ts index 8327e3d2..9478ca82 100644 --- a/src/mastra/agents/researchPaperAgent.ts +++ b/src/mastra/agents/researchPaperAgent.ts @@ -2,9 +2,6 @@ import { Agent } from '@mastra/core/agent' import { log } from '../config/logger' import { InternalSpans } from '@mastra/core/observability' -import { - TokenLimiterProcessor -} from '@mastra/core/processors' import { arxivPaperDownloaderTool, arxivPdfParserTool, @@ -78,14 +75,14 @@ User: ${role} | Lang: ${language} internal: InternalSpans.ALL, }, }, - outputProcessors: [ - new TokenLimiterProcessor(128000), - // new BatchPartsProcessor({ - // batchSize: 5, - // maxWaitTime: 75, - // emitOnNonText: true, + // outputProcessors: [ + // new TokenLimiterProcessor(128000), + // // new BatchPartsProcessor({ + // // batchSize: 5, + // // maxWaitTime: 75, + // // emitOnNonText: true, // }), - ], + //], // defaultOptions: { // autoResumeSuspendedTools: true, // }, diff --git a/src/mastra/agents/scriptWriterAgent.ts b/src/mastra/agents/scriptWriterAgent.ts index 1e9140b6..f20fca19 100644 --- a/src/mastra/agents/scriptWriterAgent.ts +++ b/src/mastra/agents/scriptWriterAgent.ts @@ -2,7 +2,7 @@ import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' import { google } from '@ai-sdk/google' import { Agent } from '@mastra/core/agent' import { InternalSpans } from '@mastra/core/observability' -import { TokenLimiterProcessor } from '@mastra/core/processors' +//import { TokenLimiterProcessor } from '@mastra/core/processors' import { fetchTool } from '../tools/fetch.tool' import { getLanguageFromContext, @@ -72,7 +72,7 @@ User: ${userTier} | Lang: ${language} }, scorers: {}, tools: scriptWriterTools, - outputProcessors: [new TokenLimiterProcessor(1048576)], + // outputProcessors: [new TokenLimiterProcessor(1048576)], // defaultOptions: { // autoResumeSuspendedTools: true, // }, diff --git a/src/mastra/agents/seoAgent.ts b/src/mastra/agents/seoAgent.ts index 94d4b0fa..930115db 100644 --- a/src/mastra/agents/seoAgent.ts +++ b/src/mastra/agents/seoAgent.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { log } from '../config/logger' import { InternalSpans } from '@mastra/core/observability' @@ -22,6 +11,7 @@ import { getRoleFromContext, } from './request-context' import { LibsqlMemory } from '../config/libsql' +import { createSupervisorAgentPatternScorer } from '../scorers/supervisor-scorers' log.info('Initializing SEO Agent...') @@ -29,207 +19,89 @@ log.info('Initializing SEO Agent...') * Evaluates whether an SEO response covers research-backed optimization guidance, * prioritization, and concrete implementation detail. */ -const seoTaskCompleteScorer = createScorer({ +const seoTaskCompleteScorer = createSupervisorAgentPatternScorer({ id: 'seo-task-complete', name: 'SEO Task Completeness', description: 'Checks whether an SEO response includes keyword or SERP insight, actionable optimization guidance, and prioritization.', - type: 'agent', + label: 'SEO completeness', + emptyReason: 'No usable SEO response was produced.', + weakReason: 'The response is present but still lacks optimization depth.', + strongReasonPrefix: 'This SEO response is strong because', + responseLengthThresholds: [ + { min: 160, weight: 0.15 }, + { min: 350, weight: 0.15 }, + ], + minParagraphsForStructure: 3, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it includes keyword or SERP insight', + regex: /keyword|serp|search intent|competitor/i, + weight: 0.2, + }, + { + label: 'it covers on-page optimization', + regex: /title|meta|header|internal link|content/i, + weight: 0.15, + }, + { + label: 'it addresses technical SEO', + regex: /technical|core web vitals|schema|crawl|index/i, + weight: 0.15, + }, + { + label: 'it prioritizes the recommendations', + regex: /priority|impact|effort|next step/i, + weight: 0.15, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasKeyword: - /keyword|serp|search intent|competitor/i.test(responseText), - hasOnPage: - /title|meta|header|internal link|content/i.test(responseText), - hasTechnical: - /technical|core web vitals|schema|crawl|index/i.test(responseText), - hasPriority: - /priority|impact|effort|next step/i.test(responseText), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText) || - responseText.split(/\n\s*\n/).filter(Boolean).length >= 3, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.responseLength >= 160) score += 0.15 - if (analysis.responseLength >= 350) score += 0.15 - if (analysis.hasKeyword) score += 0.2 - if (analysis.hasOnPage) score += 0.15 - if (analysis.hasTechnical) score += 0.15 - if (analysis.hasPriority) score += 0.15 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable SEO response was produced.' - } - - if (analysis.hasKeyword) parts.push('it includes keyword or SERP insight') - if (analysis.hasOnPage) parts.push('it covers on-page optimization') - if (analysis.hasTechnical) parts.push('it addresses technical SEO') - if (analysis.hasPriority) parts.push('it prioritizes the recommendations') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This SEO response is strong because ${parts.join(', ')}.` : 'The response is present but still lacks optimization depth.'}` - }) /** * Evaluates whether the SEO response is execution-ready, prioritized, and tied * to measurable search performance outcomes. */ -const seoActionabilityScorer = createScorer({ +const seoActionabilityScorer = createSupervisorAgentPatternScorer({ id: 'seo-actionability-readiness', name: 'SEO Actionability Readiness', description: 'Checks whether an SEO response is prioritized, measurable, and easy to implement.', - type: 'agent', + label: 'SEO actionability', + emptyReason: 'No usable SEO actionability response was produced.', + weakReason: 'The response is present but still lacks implementable detail.', + strongReasonPrefix: 'This SEO plan is strong because', + responseLengthThresholds: [ + { min: 140, weight: 0.15 }, + { min: 260, weight: 0.1 }, + ], + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it prioritizes by impact or effort', + regex: /priority|high impact|quick win|effort|impact/i, + weight: 0.25, + }, + { + label: 'it ties changes to metrics', + regex: /ranking|ctr|traffic|conversion|metric|measure/i, + weight: 0.2, + }, + { + label: 'it gives actionable implementation steps', + regex: /next step|implement|update|add|fix/i, + weight: 0.2, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasImpact: /priority|high impact|quick win|effort|impact/i.test(responseText), - hasMetrics: - /ranking|ctr|traffic|conversion|metric|measure/i.test(responseText), - hasAction: - /next step|implement|update|add|fix/i.test(responseText), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.responseLength >= 140) score += 0.15 - if (analysis.responseLength >= 260) score += 0.1 - if (analysis.hasImpact) score += 0.25 - if (analysis.hasMetrics) score += 0.2 - if (analysis.hasAction) score += 0.2 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable SEO actionability response was produced.' - } - - if (analysis.hasImpact) parts.push('it prioritizes by impact or effort') - if (analysis.hasMetrics) parts.push('it ties changes to metrics') - if (analysis.hasAction) parts.push('it gives actionable implementation steps') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This SEO plan is strong because ${parts.join(', ')}.` : 'The response is present but still lacks implementable detail.'}` - }) export const seoAgent = new Agent({ id: 'seo-agent', diff --git a/src/mastra/agents/socialMediaAgent.ts b/src/mastra/agents/socialMediaAgent.ts index e5da0be2..4511bc37 100644 --- a/src/mastra/agents/socialMediaAgent.ts +++ b/src/mastra/agents/socialMediaAgent.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { log } from '../config/logger' import { InternalSpans } from '@mastra/core/observability' @@ -18,12 +7,13 @@ import { contentStrategistAgent } from './contentStrategistAgent' import { copywriterAgent } from './copywriterAgent' import { researchAgent } from './researchAgent' import { - baseAgentRequestContextSchema, + //baseAgentRequestContextSchema, getLanguageFromContext, getUserIdFromContext, getRoleFromContext, } from './request-context' import { LibsqlMemory } from '../config/libsql' +import { createSupervisorAgentPatternScorer } from '../scorers/supervisor-scorers' log.info('Initializing Social Media Agent...') @@ -31,206 +21,89 @@ log.info('Initializing Social Media Agent...') * Evaluates whether a social-media response includes platform-aware content, * campaign guidance, and execution-ready details. */ -const socialMediaTaskCompleteScorer = createScorer({ +const socialMediaTaskCompleteScorer = createSupervisorAgentPatternScorer({ id: 'social-media-task-complete', name: 'Social Media Task Completeness', description: 'Checks whether a social-media response includes platform targeting, content direction, and publishing guidance.', - type: 'agent', + label: 'social media completeness', + emptyReason: 'No usable social media response was produced.', + weakReason: 'The response is present but still lacks campaign detail.', + strongReasonPrefix: 'This social response is strong because', + responseLengthThresholds: [ + { min: 120, weight: 0.15 }, + { min: 250, weight: 0.1 }, + ], + minParagraphsForStructure: 2, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it specifies the target platform', + regex: /linkedin|twitter|x|instagram|facebook|tiktok/i, + weight: 0.2, + }, + { + label: 'it uses channel-native content formats', + regex: /post|thread|carousel|reel|story|caption/i, + weight: 0.2, + }, + { + label: 'it includes cadence or posting guidance', + regex: /schedule|posting time|calendar|cadence/i, + weight: 0.15, + }, + { + label: 'it includes hooks, CTAs, or engagement guidance', + regex: /cta|hashtag|engagement|hook/i, + weight: 0.15, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasPlatform: - /linkedin|twitter|x|instagram|facebook|tiktok/i.test(responseText), - hasFormat: - /post|thread|carousel|reel|story|caption/i.test(responseText), - hasCadence: - /schedule|posting time|calendar|cadence/i.test(responseText), - hasEngagement: - /cta|hashtag|engagement|hook/i.test(responseText), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText) || - responseText.split(/\n\s*\n/).filter(Boolean).length >= 2, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.responseLength >= 120) score += 0.15 - if (analysis.responseLength >= 250) score += 0.1 - if (analysis.hasPlatform) score += 0.2 - if (analysis.hasFormat) score += 0.2 - if (analysis.hasCadence) score += 0.15 - if (analysis.hasEngagement) score += 0.15 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable social media response was produced.' - } - - if (analysis.hasPlatform) parts.push('it specifies the target platform') - if (analysis.hasFormat) parts.push('it uses channel-native content formats') - if (analysis.hasCadence) parts.push('it includes cadence or posting guidance') - if (analysis.hasEngagement) parts.push('it includes hooks, CTAs, or engagement guidance') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This social response is strong because ${parts.join(', ')}.` : 'The response is present but still lacks campaign detail.'}` - }) /** * Evaluates whether the social-media response is campaign-ready with clear * channel fit, hooks, and execution guidance. */ -const socialMediaExecutionScorer = createScorer({ +const socialMediaExecutionScorer = createSupervisorAgentPatternScorer({ id: 'social-media-execution-readiness', name: 'Social Media Execution Readiness', description: 'Checks whether a social-media response contains usable channel tactics, hooks, and next actions.', - type: 'agent', + label: 'social media execution', + emptyReason: 'No usable social media execution plan was produced.', + weakReason: 'The response is present but still needs execution detail.', + strongReasonPrefix: 'This execution plan is strong because', + responseLengthThresholds: [ + { min: 120, weight: 0.15 }, + { min: 220, weight: 0.1 }, + ], + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it opens with a hook or CTA', + regex: /hook|opening line|cta|call to action/i, + weight: 0.25, + }, + { + label: 'it includes cadence guidance', + regex: /cadence|schedule|posting time|weekly|daily/i, + weight: 0.15, + }, + { + label: 'it ties the plan to engagement outcomes', + regex: /engagement|reach|click|reply|save|share/i, + weight: 0.2, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasHook: /hook|opening line|cta|call to action/i.test(responseText), - hasCadence: /cadence|schedule|posting time|weekly|daily/i.test(responseText), - hasEngagement: - /engagement|reach|click|reply|save|share/i.test(responseText), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.responseLength >= 120) score += 0.15 - if (analysis.responseLength >= 220) score += 0.1 - if (analysis.hasHook) score += 0.25 - if (analysis.hasCadence) score += 0.15 - if (analysis.hasEngagement) score += 0.2 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable social media execution plan was produced.' - } - - if (analysis.hasHook) parts.push('it opens with a hook or CTA') - if (analysis.hasCadence) parts.push('it includes cadence guidance') - if (analysis.hasEngagement) parts.push('it ties the plan to engagement outcomes') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This execution plan is strong because ${parts.join(', ')}.` : 'The response is present but still needs execution detail.'}` - }) export const socialMediaAgent = new Agent({ id: 'social-media-agent', diff --git a/src/mastra/agents/stockAnalysisAgent.ts b/src/mastra/agents/stockAnalysisAgent.ts index e0d73757..cfd4951b 100644 --- a/src/mastra/agents/stockAnalysisAgent.ts +++ b/src/mastra/agents/stockAnalysisAgent.ts @@ -18,7 +18,6 @@ import { } from '../tools/polygon-tools' import { googleFinanceTool } from '../tools/serpapi-academic-local.tool' //import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' -import { TokenLimiterProcessor } from '@mastra/core/processors' import { InternalSpans } from '@mastra/core/observability' import { getLanguageFromContext, @@ -119,7 +118,7 @@ export const stockAnalysisAgent = new Agent({ internal: InternalSpans.ALL, }, }, - outputProcessors: [new TokenLimiterProcessor(1048576)], + // outputProcessors: [new TokenLimiterProcessor(1048576)], maxRetries: 5, defaultOptions: { autoResumeSuspendedTools: true, diff --git a/src/mastra/agents/supervisor-agent.ts b/src/mastra/agents/supervisor-agent.ts index a664c995..b347a521 100644 --- a/src/mastra/agents/supervisor-agent.ts +++ b/src/mastra/agents/supervisor-agent.ts @@ -1,17 +1,7 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { InternalSpans } from '@mastra/core/observability' +import { browserAgent } from './browserAgent' import { researchAgent } from './researchAgent' import { copywriterAgent } from './copywriterAgent' import { libsqlgraphQueryTool, LibsqlMemory, libsqlQueryTool, libsqlvector } from '../config/libsql' @@ -29,6 +19,7 @@ import { } from './request-context' import { embed } from 'ai'; import { ModelRouterEmbeddingModel } from '@mastra/core/llm'; +import { createSupervisorAgentPatternScorer } from '../scorers/supervisor-scorers' const workspace = new Workspace({ id: 'supervisor-workspace', @@ -66,238 +57,95 @@ const workspace = new Workspace({ * Evaluates whether the supervisor produced a complete research-backed final answer * instead of stopping at a partial delegation summary. */ -const supervisorTaskCompleteScorer = createScorer({ +const supervisorTaskCompleteScorer = createSupervisorAgentPatternScorer({ id: 'supervisor-task-complete', name: 'Supervisor Task Completeness', description: 'Checks whether the supervisor returned a structured, substantial, research-backed final response.', - type: 'agent', + label: 'supervisor completeness', + emptyReason: 'No usable supervisor response was produced.', + weakReason: 'The response is present but lacks several synthesis signals.', + strongReasonPrefix: 'This supervisor response is strong because', + responseLengthThresholds: [ + { min: 220, weight: 0.15 }, + { min: 600, weight: 0.15 }, + ], + minParagraphsForStructure: 3, + structureWeight: 0.15, + reasoningWeight: 0.1, + toolWeight: 0.05, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it starts with a direct synthesis', + regex: /summary|executive summary|top line|bottom line|direct answer|recommend/i, + weight: 0.05, + }, + { + label: 'it includes evidence or source anchors', + regex: /source|sources|citation|citations|http|www\.|\b20\d{2}\b/i, + weight: 0.15, + }, + { + label: 'it ends with next steps or follow-up guidance', + regex: /next step|next steps|action|follow-up|open question/i, + weight: 0.05, + }, + { + label: 'it acknowledges uncertainty or caveats', + regex: /risk|caveat|uncertain|unknown|assumption/i, + weight: 0.05, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - const paragraphCount = responseText.split(/\n\s*\n/).filter(Boolean).length - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - paragraphCount, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasEvidence: - /source|sources|citation|citations|http|www\.|\b20\d{2}\b/i.test( - responseText - ), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText) || paragraphCount >= 3, - hasDirectAnswer: - /summary|executive summary|top line|bottom line|direct answer|recommend/i.test( - responseText - ), - hasNextSteps: - /next step|next steps|action|follow-up|open question/i.test(responseText), - hasCaveat: - /risk|caveat|uncertain|unknown|assumption/i.test(responseText), - mentionsSynthesis: - /synthes|delegate|research|writing/i.test(responseText) || - /synthes|delegate|research|writing/i.test(systemPrompt), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.hasUserMessage) score += 0.05 - if (analysis.systemMessageCount > 0) score += 0.05 - if (analysis.responseLength >= 220) score += 0.15 - if (analysis.responseLength >= 600) score += 0.15 - if (analysis.paragraphCount >= 3) score += 0.15 - if (analysis.hasReasoning) score += 0.1 - if (analysis.toolCount > 0) score += 0.05 - if (analysis.hasEvidence) score += 0.15 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasDirectAnswer) score += 0.05 - if (analysis.hasNextSteps) score += 0.05 - if (analysis.hasCaveat) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable supervisor response was produced.' - } - - if (analysis.hasDirectAnswer) parts.push('it starts with a direct synthesis') - if (analysis.hasEvidence) parts.push('it includes evidence or source anchors') - if (analysis.hasNextSteps) parts.push('it ends with next steps or follow-up guidance') - if (analysis.hasCaveat) parts.push('it acknowledges uncertainty or caveats') - if (analysis.hasReasoning) parts.push('it shows reasoning support') - if (analysis.toolCount > 0) parts.push(`it used ${analysis.toolCount} tool call(s)`) - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This supervisor response is strong because ${parts.join(', ')}.` : 'The response is present but lacks several synthesis signals.'}` - }) /** * Evaluates whether the supervisor delivered a user-ready synthesis with a * direct answer, actionable guidance, and explicit caveats or next steps. */ -const supervisorSynthesisScorer = createScorer({ +const supervisorSynthesisScorer = createSupervisorAgentPatternScorer({ id: 'supervisor-synthesis-readiness', name: 'Supervisor Synthesis Readiness', description: 'Checks whether the supervisor response is actionable, synthesized, and ready for the user without another iteration.', - type: 'agent', + label: 'supervisor synthesis', + emptyReason: 'No usable supervisor response was produced.', + weakReason: 'The response is present but lacks several synthesis signals.', + strongReasonPrefix: 'This supervisor response is strong because', + responseLengthThresholds: [ + { min: 220, weight: 0.2 }, + { min: 600, weight: 0.1 }, + ], + minParagraphsForStructure: 3, + structureWeight: 0.15, + reasoningWeight: 0.1, + toolWeight: 0.05, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'the answer starts with a direct synthesis', + regex: /summary|in short|bottom line|recommend|recommended/i, + weight: 0.05, + }, + { + label: 'it includes evidence or source anchors', + regex: /source|sources|citation|citations|http|www\.|\b20\d{2}\b/i, + weight: 0.15, + }, + { + label: 'it ends with next steps or follow-up guidance', + regex: /next step|next steps|action|follow-up/i, + weight: 0.05, + }, + { + label: 'it acknowledges uncertainty or caveats', + regex: /risk|caveat|uncertain|unknown|assumption/i, + weight: 0.05, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - const paragraphCount = responseText.split(/\n\s*\n/).filter(Boolean).length - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - paragraphCount, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasEvidence: - /source|sources|citation|citations|http|www\.|\b20\d{2}\b/i.test( - responseText - ), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText) || paragraphCount >= 3, - hasDirectAnswer: - /summary|in short|bottom line|recommend|recommended/i.test(responseText), - hasNextSteps: - /next step|next steps|action|follow-up/i.test(responseText), - hasCaveat: - /risk|caveat|uncertain|unknown|assumption/i.test(responseText), - mentionsDelegation: - /research|writing|delegate|synthes/i.test(responseText) || - /research|writing|delegate|synthes/i.test(systemPrompt), - hasSupportSignal: Boolean(responseText.trim()), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.hasUserMessage) score += 0.05 - if (analysis.systemMessageCount > 0) score += 0.05 - if (analysis.responseLength >= 220) score += 0.2 - if (analysis.responseLength >= 600) score += 0.1 - if (analysis.paragraphCount >= 3) score += 0.15 - if (analysis.hasReasoning) score += 0.1 - if (analysis.toolCount > 0) score += 0.05 - if (analysis.hasEvidence) score += 0.15 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasDirectAnswer) score += 0.05 - if (analysis.hasNextSteps) score += 0.05 - if (analysis.hasCaveat) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable supervisor response was produced.' - } - - if (analysis.hasDirectAnswer) parts.push('the answer starts with a direct synthesis') - if (analysis.hasEvidence) parts.push('it includes evidence or source anchors') - if (analysis.hasNextSteps) parts.push('it ends with next steps or follow-up guidance') - if (analysis.hasCaveat) parts.push('it acknowledges uncertainty or caveats') - if (analysis.hasReasoning) parts.push('it shows reasoning support') - if (analysis.toolCount > 0) parts.push(`it used ${analysis.toolCount} tool call(s)`) - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This supervisor response is strong because ${parts.join(', ')}.` : 'The response is present but lacks several synthesis signals.'}` - }) export const supervisorAgent = new Agent({ id: 'supervisor-agent', @@ -318,13 +166,14 @@ You coordinate research and writing tasks using specialized agents. Available resources: - researchAgent: Gathers factual data and sources (returns bullet points) +- browserAgent: Verifies live pages, browser state, and web claims when static research is not enough - writing-agent: Transforms research into well-structured articles (returns full paragraphs) - judge: Evaluates the quality and completeness of the supervisor agent's output Delegation strategy: -1. For research requests: Delegate to research-agent first to gather facts +1. For research requests: Delegate to research-agent first to gather facts, and use browserAgent when live verification, page inspection, or browser-state evidence would materially improve confidence 2. For writing requests: Delegate to writing-agent with any available research context -3. For comprehensive reports: Delegate to research-agent first, then writing-agent +3. For comprehensive reports: Delegate to research-agent first, then writing-agent, and pull in browserAgent only for high-value verification 4. Always ensure you have gathered sufficient information before producing final output Success criteria: @@ -340,20 +189,18 @@ Final answer contract: Operating rules: - Prefer the minimum number of delegations needed for a trustworthy answer. +- Use browserAgent only when live verification will materially improve the answer; do not browse by default. - Preserve user language when possible. - If evidence is weak, say so explicitly instead of overcommitting. - Do not return raw delegation summaries as the final answer; convert them into a single coherent response.`, } }, - model: { - url: "https://api.kilo.ai/api/gateway", - id:'kilo/x-ai/grok-code-fast-1:optimized:free', - apiKey: process.env.KILO_API_KEY, - }, + model: 'google/gemma-4-31b-it:free', tools: {libsqlgraphQueryTool, libsqlQueryTool, }, agents: { researchAgent, + browserAgent, copywriterAgent, }, memory: LibsqlMemory, @@ -365,19 +212,21 @@ Operating rules: }, }, defaultOptions: { - maxSteps: 10, + maxSteps: 20, providerOptions: { anthropic: { sendReasoning: true, thinking: { type: 'adaptive', }, + cacheControl: { type: 'ephemeral' } }, google: { thinkingConfig: { includeThoughts: true, thinkingLevel: 'medium', }, + responseModalities: ['TEXT', 'IMAGE'], mediaResolution: 'MEDIA_RESOLUTION_MEDIUM', } satisfies GoogleLanguageModelOptions, openai: { @@ -440,13 +289,23 @@ Operating rules: if (context.primitiveId === 'researchAgent') { return { proceed: true, - modifiedPrompt: `${context.prompt}\n\nReturn concise research notes with sources, dated evidence, unresolved gaps, and the most decision-relevant findings first. Focus on recent developments from 2024-2026 unless the user explicitly asks for historical coverage.`, + modifiedPrompt: `${context.prompt}\n\nReturn concise research notes with sources, dated evidence, unresolved gaps, and the most decision-relevant findings first. If live page verification or browser-state evidence would materially improve confidence, explicitly say that browserAgent should verify it. Focus on recent developments from 2024-2026 unless the user explicitly asks for historical coverage.`, modifiedInstructions: 'Act as a senior research analyst. Prioritize evidence quality, source attribution, dated findings, and unresolved gaps. Return only research-ready material for synthesis.', modifiedMaxSteps: 8, } } + if (context.primitiveId === 'browserAgent') { + return { + proceed: true, + modifiedPrompt: `${context.prompt}\n\nUse deterministic browser verification to confirm live claims, page behavior, or browser-state details. Prefer the minimum navigation needed, capture concrete evidence such as URLs, timestamps, visible page text, or interaction results, and clearly separate verified facts from anything still unresolved.`, + modifiedInstructions: + 'Act as a deterministic browser verification specialist. Verify only what matters, avoid exploratory browsing, and return concise evidence that the supervisor can synthesize for the user.', + modifiedMaxSteps: 6, + } + } + if (context.primitiveId === 'copywriterAgent') { return { proceed: true, @@ -507,4 +366,4 @@ Operating rules: suppressFeedback: false, // Show feedback from the scorer }, }, -}) \ No newline at end of file +}) diff --git a/src/mastra/agents/translationAgent.ts b/src/mastra/agents/translationAgent.ts index 8dc669a2..e07041ac 100644 --- a/src/mastra/agents/translationAgent.ts +++ b/src/mastra/agents/translationAgent.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { log } from '../config/logger' import { InternalSpans } from '@mastra/core/observability' @@ -21,6 +10,7 @@ import { getRoleFromContext, } from './request-context' import { LibsqlMemory } from '../config/libsql' +import { createSupervisorAgentPatternScorer } from '../scorers/supervisor-scorers' log.info('Initializing Translation Agent...') @@ -28,204 +18,84 @@ log.info('Initializing Translation Agent...') * Evaluates whether a translation response includes the translated result, * localization reasoning, and quality-assurance guidance. */ -const translationTaskCompleteScorer = createScorer({ +const translationTaskCompleteScorer = createSupervisorAgentPatternScorer({ id: 'translation-task-complete', name: 'Translation Task Completeness', description: 'Checks whether a translation response covers translation output, cultural adaptation, and review notes.', - type: 'agent', + label: 'translation completeness', + emptyReason: 'No usable translation response was produced.', + weakReason: 'The response is present but still needs translation detail.', + strongReasonPrefix: 'This translation response is strong because', + responseLengthThresholds: [ + { min: 120, weight: 0.15 }, + { min: 250, weight: 0.15 }, + ], + minParagraphsForStructure: 2, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it identifies the source or target language', + regex: /source language|target language|translate|translation/i, + weight: 0.25, + }, + { + label: 'it explains localization or tone choices', + regex: /localization|cultural|tone|audience/i, + weight: 0.2, + }, + { + label: 'it includes QA or proofreading guidance', + regex: /quality|review|qa|proofread|validation/i, + weight: 0.15, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasLanguagePair: - /source language|target language|translate|translation/i.test(responseText), - hasLocalization: - /localization|cultural|tone|audience/i.test(responseText), - hasQA: - /quality|review|qa|proofread|validation/i.test(responseText), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText) || - responseText.split(/\n\s*\n/).filter(Boolean).length >= 2, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.responseLength >= 120) score += 0.15 - if (analysis.responseLength >= 250) score += 0.15 - if (analysis.hasLanguagePair) score += 0.25 - if (analysis.hasLocalization) score += 0.2 - if (analysis.hasQA) score += 0.15 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable translation response was produced.' - } - - if (analysis.hasLanguagePair) parts.push('it identifies the source or target language') - if (analysis.hasLocalization) parts.push('it explains localization or tone choices') - if (analysis.hasQA) parts.push('it includes QA or proofreading guidance') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This translation response is strong because ${parts.join(', ')}.` : 'The response is present but still needs translation detail.'}` - }) /** * Evaluates whether the translation response is delivery-ready with a clear * translated result and notes on localization trade-offs. */ -const translationDeliveryScorer = createScorer({ +const translationDeliveryScorer = createSupervisorAgentPatternScorer({ id: 'translation-delivery-readiness', name: 'Translation Delivery Readiness', description: 'Checks whether a translation response contains a usable translation plus localization notes or alternatives.', - type: 'agent', + label: 'translation delivery', + emptyReason: 'No usable translation delivery response was produced.', + weakReason: 'The response is present but still lacks delivery-ready detail.', + strongReasonPrefix: 'This delivery response is strong because', + responseLengthThresholds: [ + { min: 100, weight: 0.15 }, + { min: 200, weight: 0.1 }, + ], + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, + userMessageWeight: 0.05, + systemMessageWeight: 0.05, + signals: [ + { + label: 'it includes the translated or localized text', + regex: /translation|translated text|localized version|target text/i, + weight: 0.25, + }, + { + label: 'it adds notes about nuance or terminology', + regex: /note|alternative|nuance|idiom|terminology/i, + weight: 0.2, + }, + { + label: 'it reflects the target audience or locale', + regex: /audience|tone|region|locale/i, + weight: 0.15, + }, + ], }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasTranslation: - /translation|translated text|localized version|target text/i.test(responseText), - hasNotes: - /note|alternative|nuance|idiom|terminology/i.test(responseText), - hasLocale: - /audience|tone|region|locale/i.test(responseText), - hasStructure: - /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) { - return 0 - } - - let score = 0 - if (analysis.responseLength >= 100) score += 0.15 - if (analysis.responseLength >= 200) score += 0.1 - if (analysis.hasTranslation) score += 0.25 - if (analysis.hasNotes) score += 0.2 - if (analysis.hasLocale) score += 0.15 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - if (analysis.hasStructure) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - const parts: string[] = [] - - if (!analysis?.hasResponse) { - return 'No usable translation delivery response was produced.' - } - - if (analysis.hasTranslation) parts.push('it includes the translated or localized text') - if (analysis.hasNotes) parts.push('it adds notes about nuance or terminology') - if (analysis.hasLocale) parts.push('it reflects the target audience or locale') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This delivery response is strong because ${parts.join(', ')}.` : 'The response is present but still lacks delivery-ready detail.'}` - }) export const translationAgent = new Agent({ id: 'translation-agent', diff --git a/src/mastra/agents/weather-agent.ts b/src/mastra/agents/weather-agent.ts index a76798c5..9bee10e0 100644 --- a/src/mastra/agents/weather-agent.ts +++ b/src/mastra/agents/weather-agent.ts @@ -1,10 +1,7 @@ import { Agent } from '@mastra/core/agent' -import type { RequestContext } from '@mastra/core/request-context' - import { libsqlChunker,} from '../tools/document-chunking.tool' import { weatherTool } from '../tools/weather-tool' import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' -import { TokenLimiterProcessor } from '@mastra/core/processors' import { InternalSpans } from '@mastra/core/observability' import { mainWorkspace } from '../workspaces' import type { AgentRequestContext } from './request-context' diff --git a/src/mastra/agents/webResearchAgent.ts b/src/mastra/agents/webResearchAgent.ts index cdd33d03..bbf00c4a 100644 --- a/src/mastra/agents/webResearchAgent.ts +++ b/src/mastra/agents/webResearchAgent.ts @@ -1,8 +1,5 @@ import type { GoogleLanguageModelOptions } from '@ai-sdk/google' import { Agent } from '@mastra/core/agent' -import { - TokenLimiterProcessor, -} from '@mastra/core/processors' import type { RequestContext } from '@mastra/core/request-context' import { google } from '../config/google' diff --git a/src/mastra/auth.ts b/src/mastra/auth.ts index 6aaa396a..1dd998ef 100644 --- a/src/mastra/auth.ts +++ b/src/mastra/auth.ts @@ -5,6 +5,7 @@ import { betterAuth, type Auth, type BetterAuthOptions } from 'better-auth' import { admin, multiSession, oAuthProxy, oneTap, username } from 'better-auth/plugins' import { apiKey } from '@better-auth/api-key' import { Kysely, type ColumnType } from 'kysely' +import { log } from './config/logger' type AuthDateColumn = ColumnType; type AuthNullableDateColumn = ColumnType< @@ -111,19 +112,52 @@ interface BetterAuthDatabase { } const isDevelopment = process.env.NODE_ENV !== 'production' -const githubClientId = process.env.GITHUB_CLIENT_ID?.trim() -const githubClientSecret = process.env.GITHUB_CLIENT_SECRET?.trim() + +function trimTrailingSlash(url: string): string { + return url.replace(/\/+$/, '') +} + +/** + * Normalizes legacy OAuth callback env values onto Better Auth's default + * Next.js callback route so older local env files do not break Google sign-in. + */ +function resolveGoogleRedirectUri(baseUrl: string): string { + const configuredRedirectUri = process.env.GOOGLE_CLIENT_CALLBACK_URL?.trim() + const defaultRedirectUri = `${trimTrailingSlash(baseUrl)}/api/auth/callback/google` + + if (!configuredRedirectUri) { + return defaultRedirectUri + } + + if (/\/api\/callback\/?$/.test(configuredRedirectUri)) { + log.warn('Normalizing legacy Google callback URL', { + configuredRedirectUri, + normalizedRedirectUri: defaultRedirectUri, + }) + return defaultRedirectUri + } + + return configuredRedirectUri +} + +const baseURL = + process.env.BETTER_AUTH_URL?.trim() ?? + process.env.NEXT_PUBLIC_BETTER_AUTH_URL?.trim() ?? + (isDevelopment ? 'http://localhost:3000' : undefined) const trustedOrigins = [ - process.env.BETTER_AUTH_TRUSTED_ORIGIN, + process.env.BETTER_AUTH_TRUSTED_ORIGIN?.trim(), + process.env.NEXT_PUBLIC_BETTER_AUTH_URL?.trim(), + baseURL, isDevelopment ? 'http://localhost:3000' : undefined, isDevelopment ? 'http://127.0.0.1:3000' : undefined, -].filter((origin): origin is string => Boolean(origin)) - -const baseURL = - process.env.BETTER_AUTH_URL ?? (isDevelopment ? 'http://localhost:3000' : undefined) +].filter((origin, index, values): origin is string => Boolean(origin) && values.indexOf(origin) === index) const socialProviders: BetterAuthOptions['socialProviders'] = {} +const githubClientId = process.env.GITHUB_CLIENT_ID?.trim() +const githubClientSecret = process.env.GITHUB_CLIENT_SECRET?.trim() +const googleClientId = process.env.GOOGLE_CLIENT_ID?.trim() +const googleClientSecret = process.env.GOOGLE_CLIENT_SECRET?.trim() if (githubClientId && githubClientSecret) { socialProviders.github = { @@ -132,6 +166,14 @@ if (githubClientId && githubClientSecret) { } } +if (googleClientId && googleClientSecret && baseURL) { + socialProviders.google = { + clientId: googleClientId, + clientSecret: googleClientSecret, + redirectURI: resolveGoogleRedirectUri(baseURL), + } +} + const authDatabase = new Kysely({ dialect: new LibsqlDialect({ url: process.env.TURSO_DATABASE_URL ?? process.env.TURSO_URL ?? 'file:./database.db', @@ -176,17 +218,7 @@ const authOptions: BetterAuthOptions = { db: authDatabase, type: 'sqlite', }, - socialProviders: { - github: { - clientId: process.env.GITHUB_CLIENT_ID ?? '', - clientSecret: process.env.GITHUB_CLIENT_SECRET ?? '', - }, - google: { - clientId: process.env.GOOGLE_CLIENT_ID ?? '', - clientSecret: process.env.GOOGLE_CLIENT_SECRET ?? '', - redirectURI: process.env.GOOGLE_CLIENT_CALLBACK_URL ?? undefined, - } - }, + socialProviders, baseURL: process.env.BETTER_AUTH_URL ?? 'http://localhost:3000', secret: process.env.BETTER_AUTH_SECRET ?? 'supersecret', plugins: [ @@ -221,4 +253,3 @@ export const mastraAuth = new MastraAuthBetterAuth({ signUpEnabled: true, }) - diff --git a/src/mastra/browsers.ts b/src/mastra/browsers.ts index 1adbae12..e248a7e7 100644 --- a/src/mastra/browsers.ts +++ b/src/mastra/browsers.ts @@ -3,55 +3,367 @@ import { StagehandBrowser } from '@mastra/stagehand' import { log } from './config/logger' +type BrowserScope = 'shared' | 'thread' +type BrowserConnectionMode = 'browserbase' | 'cdp' +type ScreencastFormat = 'jpeg' | 'png' +type StagehandEnvironment = 'LOCAL' | 'BROWSERBASE' + +const DEFAULT_CHROME_CDP_URL = 'http://127.0.0.1:9222' +const DEFAULT_BROWSER_TIMEOUT_MS = 30000 +const DEFAULT_STAGEHAND_DOM_SETTLE_TIMEOUT_MS = 5000 +const DEFAULT_STAGEHAND_MODEL = 'google/gemini-3.1-flash-lite-preview' +const DEFAULT_VIEWPORT_WIDTH = 1440 +const DEFAULT_VIEWPORT_HEIGHT = 900 + +interface BrowserRuntimeProfile { + browserLabel: string + connectionMode: BrowserConnectionMode + scope: BrowserScope + headless: boolean + timeoutMs: number + viewport: { + width: number + height: number + } + screencast: { + format: ScreencastFormat + quality: number + maxWidth: number + maxHeight: number + everyNthFrame: number + } + cdpUrl?: string + environment?: StagehandEnvironment +} + +/** + * Reads the first non-empty environment value from the provided keys. + */ +function readStringEnv( + keys: readonly string[], + fallback?: string +): string | undefined { + for (const key of keys) { + const value = process.env[key]?.trim() + + if (value) { + return value + } + } + + return fallback +} + +/** + * Reads a positive numeric environment variable with a safe fallback. + */ +function readNumberEnv(keys: readonly string[], fallback: number): number { + const rawValue = readStringEnv(keys) + + if (!rawValue) { + return fallback + } + + const parsedValue = Number(rawValue) + return Number.isFinite(parsedValue) && parsedValue > 0 + ? parsedValue + : fallback +} + +/** + * Reads a boolean-like environment variable using common true/false forms. + */ +function readBooleanEnv(keys: readonly string[], fallback: boolean): boolean { + const rawValue = readStringEnv(keys)?.toLowerCase() + + if (!rawValue) { + return fallback + } + + if (['1', 'true', 'yes', 'on'].includes(rawValue)) { + return true + } + + if (['0', 'false', 'no', 'off'].includes(rawValue)) { + return false + } + + return fallback +} + +/** + * Reads the screencast image format while preserving Mastra-supported values. + */ +function readScreencastFormatEnv( + keys: readonly string[], + fallback: ScreencastFormat +): ScreencastFormat { + const rawValue = readStringEnv(keys)?.toLowerCase() + + return rawValue === 'jpeg' || rawValue === 'png' ? rawValue : fallback +} + +/** + * Reads the Stagehand execution environment. + */ +function readStagehandEnvironmentEnv( + keys: readonly string[], + fallback: StagehandEnvironment +): StagehandEnvironment { + const rawValue = readStringEnv(keys)?.toUpperCase() + + return rawValue === 'LOCAL' || rawValue === 'BROWSERBASE' + ? rawValue + : fallback +} + +/** + * Reads the Stagehand verbosity value while preserving supported levels only. + */ +function readStagehandVerboseEnv( + keys: readonly string[], + fallback: 0 | 1 | 2 +): 0 | 1 | 2 { + const parsedValue = readNumberEnv(keys, fallback) + + return parsedValue === 0 || parsedValue === 1 || parsedValue === 2 + ? parsedValue + : fallback +} + +/** + * Resolves the Chrome CDP endpoint used for deterministic and Stagehand + * browser connections. + */ function resolveChromeCdpUrl(): string { return ( - process.env.CHROME_CDP_URL?.trim() ?? - process.env.CHROME_REMOTE_DEBUGGING_URL?.trim() ?? - 'http://127.0.0.1:9222' + readStringEnv( + ['CHROME_CDP_URL', 'CHROME_REMOTE_DEBUGGING_URL'], + DEFAULT_CHROME_CDP_URL + ) ?? DEFAULT_CHROME_CDP_URL ) } -const sharedViewport = { - width: 1440, - height: 900, +/** + * Creates lifecycle hooks that log browser readiness and teardown using the + * same production metadata across browser providers. + */ +function createBrowserLifecycleHooks(profile: BrowserRuntimeProfile) { + const launchTimes = new Map() + + return { + onLaunch: async ({ + browser, + }: { + browser: { + id: string + name: string + provider: string + status: string + headless: boolean + } + }) => { + launchTimes.set(browser.id, Date.now()) + + log.info(`${profile.browserLabel} ready`, { + browserId: browser.id, + browserName: browser.name, + provider: browser.provider, + status: browser.status, + headless: browser.headless, + connectionMode: profile.connectionMode, + environment: profile.environment, + scope: profile.scope, + timeoutMs: profile.timeoutMs, + viewport: profile.viewport, + screencast: profile.screencast, + cdpUrl: profile.cdpUrl, + }) + }, + onClose: async ({ + browser, + }: { + browser: { + id: string + name: string + provider: string + status: string + headless: boolean + } + }) => { + const launchedAt = launchTimes.get(browser.id) + log.info(`${profile.browserLabel} closed`, { + browserId: browser.id, + browserName: browser.name, + provider: browser.provider, + status: browser.status, + headless: browser.headless, + connectionMode: profile.connectionMode, + environment: profile.environment, + scope: profile.scope, + sessionDurationMs: + typeof launchedAt === 'number' ? Date.now() - launchedAt : undefined, + }) + launchTimes.delete(browser.id) + }, + } } +const chromeCdpUrl = resolveChromeCdpUrl() +const sharedViewport = { + width: readNumberEnv( + ['BROWSER_VIEWPORT_WIDTH', 'AGENT_BROWSER_VIEWPORT_WIDTH'], + DEFAULT_VIEWPORT_WIDTH + ), + height: readNumberEnv( + ['BROWSER_VIEWPORT_HEIGHT', 'AGENT_BROWSER_VIEWPORT_HEIGHT'], + DEFAULT_VIEWPORT_HEIGHT + ), +} const sharedScreencast = { - format: 'png' as const, - quality: 80, - maxWidth: 1440, - maxHeight: 900, + format: readScreencastFormatEnv( + ['BROWSER_SCREENCAST_FORMAT'], + 'png' + ), + quality: readNumberEnv(['BROWSER_SCREENCAST_QUALITY'], 80), + maxWidth: readNumberEnv( + ['BROWSER_SCREENCAST_MAX_WIDTH'], + sharedViewport.width + ), + maxHeight: readNumberEnv( + ['BROWSER_SCREENCAST_MAX_HEIGHT'], + sharedViewport.height + ), + everyNthFrame: readNumberEnv(['BROWSER_SCREENCAST_EVERY_NTH_FRAME'], 2), +} + +const agentBrowserScope = 'shared' as const +const stagehandEnvironment = readStagehandEnvironmentEnv( + ['STAGEHAND_ENV'], + 'LOCAL' +) +const stagehandScope = 'shared' as const +const agentBrowserHeadless = readBooleanEnv( + ['AGENT_BROWSER_HEADLESS', 'BROWSER_HEADLESS'], + false +) +const agentBrowserTimeoutMs = readNumberEnv( + ['AGENT_BROWSER_TIMEOUT_MS', 'BROWSER_TIMEOUT_MS'], + DEFAULT_BROWSER_TIMEOUT_MS +) +const stagehandHeadless = readBooleanEnv( + ['STAGEHAND_HEADLESS', 'BROWSER_HEADLESS'], + false +) +const stagehandTimeoutMs = readNumberEnv( + ['STAGEHAND_TIMEOUT_MS', 'BROWSER_TIMEOUT_MS'], + DEFAULT_BROWSER_TIMEOUT_MS +) + +function resolveStagehandConnectionOptions( + environment: StagehandEnvironment, + scope: 'shared', + cdpUrl: string +) { + if (environment === 'BROWSERBASE') { + const apiKey = readStringEnv(['BROWSERBASE_API_KEY']) + const projectId = readStringEnv(['BROWSERBASE_PROJECT_ID']) + + if (!apiKey || !projectId) { + throw new Error( + 'StagehandBrowser requires BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID when STAGEHAND_ENV=BROWSERBASE.' + ) + } + + return { + apiKey, + connectionMode: 'browserbase' as const, + projectId, + scope, + } + } + + return { + cdpUrl, + connectionMode: 'cdp' as const, + scope, + } +} + +const stagehandConnectionOptions = resolveStagehandConnectionOptions( + stagehandEnvironment, + stagehandScope, + chromeCdpUrl +) + +const agentBrowserProfile: BrowserRuntimeProfile = { + browserLabel: 'deterministic-agent-browser', + cdpUrl: chromeCdpUrl, + connectionMode: 'cdp', + headless: agentBrowserHeadless, + scope: agentBrowserScope, + screencast: sharedScreencast, + timeoutMs: agentBrowserTimeoutMs, + viewport: sharedViewport, +} + +const stagehandBrowserProfile: BrowserRuntimeProfile = { + browserLabel: 'stagehand-browser', + cdpUrl: + stagehandConnectionOptions.connectionMode === 'cdp' ? chromeCdpUrl : undefined, + connectionMode: stagehandConnectionOptions.connectionMode, + environment: stagehandEnvironment, + headless: stagehandHeadless, + scope: stagehandScope, + screencast: sharedScreencast, + timeoutMs: stagehandTimeoutMs, + viewport: sharedViewport, +} + +export const browserRuntimeConfig = { + chromeCdpUrl, + sharedScreencast, + sharedViewport, } export const agentBrowser = new AgentBrowser({ - headless: false, + headless: agentBrowserHeadless, viewport: sharedViewport, - timeout: 30000, - cdpUrl: resolveChromeCdpUrl, - scope: 'shared', + timeout: agentBrowserTimeoutMs, + cdpUrl: () => chromeCdpUrl, + scope: agentBrowserScope, screencast: sharedScreencast, - onLaunch: async ({ browser }) => { - log.info('Shared browser connected to Chrome', { - browserName: browser.name, - browserId: browser.id, - }) - }, - onClose: async ({ browser }) => { - log.info('Shared browser disconnected from Chrome', { - browserName: browser.name, - browserId: browser.id, - }) - }, + ...createBrowserLifecycleHooks(agentBrowserProfile), }) -export const stagehand = new StagehandBrowser({ - headless: false, - model: 'google/gemini-3.1-flash-lite-preview', - selfHeal: true, - env: 'LOCAL', - scope: 'shared', - verbose: 2, +export const stagehandBrowser = new StagehandBrowser({ + headless: stagehandHeadless, + model: + readStringEnv(['STAGEHAND_MODEL'], DEFAULT_STAGEHAND_MODEL) ?? + DEFAULT_STAGEHAND_MODEL, + selfHeal: readBooleanEnv(['STAGEHAND_SELF_HEAL'], true), + domSettleTimeout: readNumberEnv( + ['STAGEHAND_DOM_SETTLE_TIMEOUT_MS'], + DEFAULT_STAGEHAND_DOM_SETTLE_TIMEOUT_MS + ), + env: stagehandEnvironment, + verbose: readStagehandVerboseEnv(['STAGEHAND_VERBOSE'], 2), viewport: sharedViewport, + timeout: stagehandTimeoutMs, screencast: sharedScreencast, - systemPrompt: 'You can browse the web using natural language. Use stagehand_act to perform actions like "click the login button". Use stagehand_extract to get data from pages, stagehand_observe Discover actionable elements on a page, stagehand_navigate Navigate to a URL, stagehand_tabs Manage browser tabs, stagehand_close Close the browser' + ...stagehandConnectionOptions, + systemPrompt: `You are a production browser operator for high-signal research and verification. + +Use stagehand_navigate to reach the exact target page, stagehand_observe to discover the actionable surface, stagehand_act for natural-language interactions, and stagehand_extract when the caller needs structured facts pulled from the page. + +Operating rules: +- Prefer the smallest set of navigations and interactions needed to verify the claim. +- Treat visible page text, URLs, titles, timestamps, and state transitions as evidence. +- Use tabs intentionally and close irrelevant tabs when they no longer help the task. +- Do not perform destructive, account-changing, or purchase-like actions unless the caller explicitly asks for them. +- If a page blocks progress, report the blocker clearly instead of guessing. +- When extracting data, keep the schema tight and separate verified facts from unresolved uncertainty.`, + ...createBrowserLifecycleHooks(stagehandBrowserProfile), }) + +export const stagehand = stagehandBrowser diff --git a/src/mastra/config/libsql.ts b/src/mastra/config/libsql.ts index 5924c12b..49b3e366 100644 --- a/src/mastra/config/libsql.ts +++ b/src/mastra/config/libsql.ts @@ -47,7 +47,7 @@ export const LibsqlMemory = new Memory({ logOutputs: true, }, }, - maxParallelCalls: 5, // Limit parallel embedding calls to avoid rate limits + //maxParallelCalls: 10, // Limit parallel embedding calls to avoid rate limits providerOptions: { google: { outputDimensions: 3072, @@ -121,7 +121,8 @@ export const LibsqlMemory = new Memory({ }) log.info('LibSQLStore and Memory initialized with LibSQLVector support', { - url: process.env.TURSO_DATABASE_URL ?? 'file:./vectors.db', + storage: process.env.TURSO_DATABASE_URL ?? 'file:./database.db', + vector: process.env.TURSO_DATABASE_URL ?? 'file:./vectors.db', // schema: process.env.DB_SCHEMA ?? 'mastra', // maxConnections: parseInt(process.env.DB_MAX_CONNECTIONS ?? '20'), memoryOptions: { diff --git a/src/mastra/index.ts b/src/mastra/index.ts index 29165a16..a70debc3 100644 --- a/src/mastra/index.ts +++ b/src/mastra/index.ts @@ -47,6 +47,7 @@ import { researchPaperAgent } from './agents/researchPaperAgent' // Utility Agents import { bgColorAgent } from './agents/bgColorAgent' +import { browserAgent } from './agents/browserAgent' import { calendarAgent } from './agents/calendarAgent' import { noteTakerAgent } from './agents/noteTakerAgent' import { danePackagePublisher } from './agents/package-publisher' @@ -289,6 +290,7 @@ export const mastra = new Mastra({ // Calendar and misc calendarAgent, bgColorAgent, + browserAgent, // Package publisher danePackagePublisher, // Financial Chart Agents diff --git a/src/mastra/networks/AGENTS.md b/src/mastra/networks/AGENTS.md index 90cc5c55..d4931e69 100644 --- a/src/mastra/networks/AGENTS.md +++ b/src/mastra/networks/AGENTS.md @@ -196,7 +196,7 @@ Networks are routing agents that coordinate multiple specialized agents to handl - Preserve context when passing between agents - Log routing decisions for debugging - When a network delegates to child agents, prefer inline `defaultOptions.delegation` hooks in that network file to refine prompts, handle delegation failure feedback, and trim parent message context before handoff. -- Keep network completion checks local to the network file with a network-scoped `createScorer(...)` rather than relying on shared coordinator abstractions. +- Keep network completion checks local to the network file, but prefer building them with shared primitives such as `createSupervisorPatternScorer(...)` from `src/mastra/scorers/supervisor-scorers.ts` so the network keeps auditable local signals without duplicating scorer plumbing. - Prefer multiple local scorers when a network can validly finish through different answer shapes (for example, a broad completeness scorer plus a shorter decision-readiness or execution-readiness scorer). - Add an explicit final-answer contract in long coordinator instructions so the network knows what a user-ready synthesis should look like after delegation. - Do **not** rely on `nestedAgents` adapters or broad parent-side casts when wiring child agents. @@ -208,6 +208,7 @@ Networks are routing agents that coordinate multiple specialized agents to handl | Version | Date (UTC) | Changes | | ------- | ---------- | ------------------------------------------------------------- | +| 2.0.5 | 2026-04-15 | Standardized coordinator networks on local `createSupervisorPatternScorer(...)` wrappers so they keep auditable network-specific signals while sharing the common scorer pipeline. | | 2.0.4 | 2026-03-28 | Added the current standard for dual local completion scorers and explicit final-answer contracts in coordinator networks. | | 2.0.3 | 2026-03-27 | Added the current standard for inline network delegation hooks and network-local completion scorers. | | 2.0.2 | 2026-03-17 | Replaced adapter guidance with the current source-level child-agent typing standard (`unknown` public request context, internal runtime parsing). | diff --git a/src/mastra/networks/businessIntelligenceNetwork.ts b/src/mastra/networks/businessIntelligenceNetwork.ts index c7323684..374c6b50 100644 --- a/src/mastra/networks/businessIntelligenceNetwork.ts +++ b/src/mastra/networks/businessIntelligenceNetwork.ts @@ -1,18 +1,5 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' -import { - TokenLimiterProcessor -} from '@mastra/core/processors' + import { dataIngestionAgent } from '../agents/dataIngestionAgent' import { dataTransformationAgent } from '../agents/dataTransformationAgent' import { evaluationAgent } from '../agents/evaluationAgent' @@ -22,6 +9,7 @@ import { researchAgent } from '../agents/researchAgent' import { stockAnalysisAgent } from '../agents/stockAnalysisAgent' import { log } from '../config/logger' import { LibsqlMemory } from '../config/libsql' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' log.info('Initializing Business Intelligence Network...') @@ -29,189 +17,79 @@ log.info('Initializing Business Intelligence Network...') * Checks that the business-intelligence network returns actionable BI output * with findings, metrics, or recommendations. */ -const businessIntelligenceNetworkTaskCompleteScorer = createScorer({ - id: 'business-intelligence-network-task-complete', - name: 'Business Intelligence Network Task Completeness', - description: - 'Checks whether the BI network returned concrete analysis, reporting, or visualization guidance.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasBiLanguage: - /kpi|dashboard|analysis|metric|report|forecast|insight|trend|visualization|recommendation/i.test( - responseText - ), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - hasDecision: - /decision|business impact|recommendation|priority|next step/i.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 80) score += 0.2 - if (analysis.responseLength >= 160) score += 0.1 - if (analysis.hasBiLanguage) score += 0.35 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasDecision) score += 0.1 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable BI response was produced.' - - const parts: string[] = [] - if (analysis.hasBiLanguage) parts.push('it includes BI-specific analysis language') - if (analysis.hasStructure) parts.push('it is structured for reporting') - if (analysis.hasDecision) parts.push('it points toward a decision or next step') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This BI response is strong because ${parts.join(', ')}.` : 'The response is present but still needs analytics detail.'}` +const businessIntelligenceNetworkTaskCompleteScorer = + createSupervisorPatternScorer({ + id: 'business-intelligence-network-task-complete', + name: 'Business Intelligence Network Task Completeness', + description: + 'Checks whether the BI network returned concrete analysis, reporting, or visualization guidance.', + label: 'Business intelligence response', + emptyReason: 'No usable BI response was produced.', + weakReason: 'The response is present but still needs analytics detail.', + strongReasonPrefix: 'This BI response is strong because', + signals: [ + { + label: 'it includes BI-specific analysis language', + regex: + /kpi|dashboard|analysis|metric|report|forecast|insight|trend|visualization|recommendation/i, + weight: 0.35, + }, + { + label: 'it points toward a decision or next step', + regex: /decision|business impact|recommendation|priority|next step/i, + weight: 0.1, + }, + ], + responseLengthThresholds: [ + { min: 80, weight: 0.2 }, + { min: 160, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) /** * Checks that the BI answer is decision-ready with key metrics, findings, and * recommended next actions. */ -const businessIntelligenceNetworkDecisionScorer = createScorer({ - id: 'business-intelligence-network-decision-readiness', - name: 'Business Intelligence Network Decision Readiness', - description: - 'Checks whether the BI response includes metrics, business interpretation, and decision-oriented next steps.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasKpi: /kpi|metric|dashboard|forecast|trend/i.test(responseText), - hasDecision: - /business impact|decision|recommend|opportunity|risk/i.test(responseText), - hasAction: /next step|investigate|monitor|act/i.test(responseText), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 160) score += 0.2 - if (analysis.responseLength >= 260) score += 0.1 - if (analysis.hasKpi) score += 0.25 - if (analysis.hasDecision) score += 0.2 - if (analysis.hasAction) score += 0.15 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable BI decision response was produced.' - - const parts: string[] = [] - if (analysis.hasKpi) parts.push('it includes KPI or trend language') - if (analysis.hasDecision) parts.push('it frames a business decision or risk') - if (analysis.hasAction) parts.push('it suggests next actions') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This BI decision response is strong because ${parts.join(', ')}.` : 'The response is present but still needs decision-ready detail.'}` +const businessIntelligenceNetworkDecisionScorer = + createSupervisorPatternScorer({ + id: 'business-intelligence-network-decision-readiness', + name: 'Business Intelligence Network Decision Readiness', + description: + 'Checks whether the BI response includes metrics, business interpretation, and decision-oriented next steps.', + label: 'Business intelligence decision response', + emptyReason: 'No usable BI decision response was produced.', + weakReason: + 'The response is present but still needs decision-ready detail.', + strongReasonPrefix: 'This BI decision response is strong because', + signals: [ + { + label: 'it includes KPI or trend language', + regex: /kpi|metric|dashboard|forecast|trend/i, + weight: 0.25, + }, + { + label: 'it frames a business decision or risk', + regex: /business impact|decision|recommend|opportunity|risk/i, + weight: 0.2, + }, + { + label: 'it suggests next actions', + regex: /next step|investigate|monitor|act/i, + weight: 0.15, + }, + ], + responseLengthThresholds: [ + { min: 160, weight: 0.2 }, + { min: 260, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, }) export const businessIntelligenceNetwork = new Agent({ @@ -387,14 +265,6 @@ export const businessIntelligenceNetwork = new Agent({ }, options: {}, // tools: { confirmationTool }, - outputProcessors: [ - new TokenLimiterProcessor(128000), - // new BatchPartsProcessor({ - // batchSize: 20, - // maxWaitTime: 100, - // emitOnNonText: true, - // }), - ], defaultOptions: { maxSteps: 20, delegation: { diff --git a/src/mastra/networks/codingTeamNetwork.ts b/src/mastra/networks/codingTeamNetwork.ts index e310ddd8..d69b2781 100644 --- a/src/mastra/networks/codingTeamNetwork.ts +++ b/src/mastra/networks/codingTeamNetwork.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import type { Processor, ProcessorMessageResult, @@ -31,6 +20,7 @@ import { repoIngestionWorkflow } from '../workflows/repo-ingestion-workflow' import { researchSynthesisWorkflow } from '../workflows/research-synthesis-workflow' import { specGenerationWorkflow } from '../workflows/spec-generation-workflow' import { LibsqlMemory } from '../config/libsql' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' log.info('Initializing Coding Team Network...') @@ -38,187 +28,72 @@ log.info('Initializing Coding Team Network...') * Checks that the coding network returns a concrete engineering deliverable, * review, or plan instead of only stating that specialists are available. */ -const codingTeamNetworkTaskCompleteScorer = createScorer({ +const codingTeamNetworkTaskCompleteScorer = createSupervisorPatternScorer({ id: 'coding-team-network-task-complete', name: 'Coding Team Network Task Completeness', description: 'Checks whether the coding network returned actionable engineering guidance or output.', - type: 'agent', + label: 'Coding team response', + emptyReason: 'No usable engineering response was produced.', + weakReason: 'The response is present but still needs engineering depth.', + strongReasonPrefix: 'This coding response is strong because', + signals: [ + { + label: 'it includes engineering-specific guidance', + regex: + /architecture|review|test|refactor|implementation|risk|trade-off|code|plan/i, + weight: 0.4, + }, + ], + responseLengthThresholds: [ + { min: 80, weight: 0.2 }, + { min: 160, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasEngineeringLanguage: - /architecture|review|test|refactor|implementation|risk|trade-off|code|plan/i.test( - responseText - ), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 80) score += 0.2 - if (analysis.responseLength >= 160) score += 0.1 - if (analysis.hasEngineeringLanguage) score += 0.4 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable engineering response was produced.' - - const parts: string[] = [] - if (analysis.hasEngineeringLanguage) parts.push('it includes engineering-specific guidance') - if (analysis.hasStructure) parts.push('it is structured and actionable') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This coding response is strong because ${parts.join(', ')}.` : 'The response is present but still needs engineering depth.'}` - }) /** * Checks that the coding-team answer is execution-ready with priorities, * engineering rationale, and concrete next actions. */ -const codingTeamNetworkExecutionScorer = createScorer({ +const codingTeamNetworkExecutionScorer = createSupervisorPatternScorer({ id: 'coding-team-network-execution-readiness', name: 'Coding Team Network Execution Readiness', description: 'Checks whether the coding-team response includes implementation order, validation guidance, or risk-aware next steps.', - type: 'agent', + label: 'Coding team execution response', + emptyReason: 'No usable execution-ready coding plan was produced.', + weakReason: 'The response is present but still lacks execution detail.', + strongReasonPrefix: 'This execution plan is strong because', + signals: [ + { + label: 'it clarifies ordering or phases', + regex: /priority|sequence|phase|first|then/i, + weight: 0.25, + }, + { + label: 'it names risks or constraints', + regex: /trade-off|risk|assumption|constraint/i, + weight: 0.2, + }, + { + label: 'it includes validation or next-step guidance', + regex: /test|validate|review|next step|implement/i, + weight: 0.2, + }, + ], + responseLengthThresholds: [ + { min: 160, weight: 0.2 }, + { min: 280, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasPriority: /priority|sequence|phase|first|then/i.test(responseText), - hasRisk: - /trade-off|risk|assumption|constraint/i.test(responseText), - hasValidation: - /test|validate|review|next step|implement/i.test(responseText), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 160) score += 0.2 - if (analysis.responseLength >= 280) score += 0.1 - if (analysis.hasPriority) score += 0.25 - if (analysis.hasRisk) score += 0.2 - if (analysis.hasValidation) score += 0.2 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable execution-ready coding plan was produced.' - - const parts: string[] = [] - if (analysis.hasPriority) parts.push('it clarifies ordering or phases') - if (analysis.hasRisk) parts.push('it names risks or constraints') - if (analysis.hasValidation) parts.push('it includes validation or next-step guidance') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This execution plan is strong because ${parts.join(', ')}.` : 'The response is present but still lacks execution detail.'}` - }) export class QualityChecker implements Processor { id = 'quality-checker' diff --git a/src/mastra/networks/contentCreationNetwork.ts b/src/mastra/networks/contentCreationNetwork.ts index 41a4a6a5..18ce03e5 100644 --- a/src/mastra/networks/contentCreationNetwork.ts +++ b/src/mastra/networks/contentCreationNetwork.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { contentStrategistAgent } from '../agents/contentStrategistAgent' import { copywriterAgent } from '../agents/copywriterAgent' @@ -18,6 +7,7 @@ import { evaluationAgent } from '../agents/evaluationAgent' import { scriptWriterAgent } from '../agents/scriptWriterAgent' import { googleAI3 } from '../config/google' import { log } from '../config/logger' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' import { contentReviewWorkflow } from '../workflows/content-review-workflow' import { contentStudioWorkflow } from '../workflows/content-studio-workflow' @@ -29,187 +19,73 @@ log.info('Initializing Content Creation Network...') * Checks that the content-creation network produces a usable content deliverable * or a concrete editorial plan. */ -const contentCreationNetworkTaskCompleteScorer = createScorer({ - id: 'content-creation-network-task-complete', - name: 'Content Creation Network Task Completeness', - description: - 'Checks whether the network returned a substantial draft, edit, strategy, or quality review.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasContentLanguage: - /headline|audience|draft|edit|tone|script|cta|content|strategy|quality/i.test( - responseText - ), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 70) score += 0.2 - if (analysis.responseLength >= 140) score += 0.1 - if (analysis.hasContentLanguage) score += 0.4 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable content creation response was produced.' - - const parts: string[] = [] - if (analysis.hasContentLanguage) parts.push('it includes content or editorial language') - if (analysis.hasStructure) parts.push('it is structured for handoff') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This content response is strong because ${parts.join(', ')}.` : 'The response is present but still needs creative detail.'}` +const contentCreationNetworkTaskCompleteScorer = + createSupervisorPatternScorer({ + id: 'content-creation-network-task-complete', + name: 'Content Creation Network Task Completeness', + description: + 'Checks whether the network returned a substantial draft, edit, strategy, or quality review.', + label: 'Content creation response', + emptyReason: 'No usable content creation response was produced.', + weakReason: 'The response is present but still needs creative detail.', + strongReasonPrefix: 'This content response is strong because', + signals: [ + { + label: 'it includes content or editorial language', + regex: + /headline|audience|draft|edit|tone|script|cta|content|strategy|quality/i, + weight: 0.4, + }, + ], + responseLengthThresholds: [ + { min: 70, weight: 0.2 }, + { min: 140, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) /** * Checks that the content-creation answer is delivery-ready with a draft, * editorial direction, or clear revision guidance. */ -const contentCreationNetworkDeliveryScorer = createScorer({ - id: 'content-creation-network-delivery-readiness', - name: 'Content Creation Network Delivery Readiness', - description: - 'Checks whether the content response is ready to publish, revise, or hand off to the next editorial step.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasDraft: - /draft|outline|headline|script|copy/i.test(responseText), - hasVoice: - /tone|audience|positioning|voice/i.test(responseText), - hasDelivery: - /next step|revise|publish|review|qa/i.test(responseText), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 140) score += 0.2 - if (analysis.responseLength >= 240) score += 0.1 - if (analysis.hasDraft) score += 0.25 - if (analysis.hasVoice) score += 0.2 - if (analysis.hasDelivery) score += 0.2 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable content delivery response was produced.' - - const parts: string[] = [] - if (analysis.hasDraft) parts.push('it includes draft or outline language') - if (analysis.hasVoice) parts.push('it addresses tone, audience, or positioning') - if (analysis.hasDelivery) parts.push('it includes delivery or revision guidance') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This delivery response is strong because ${parts.join(', ')}.` : 'The response is present but still needs handoff detail.'}` +const contentCreationNetworkDeliveryScorer = + createSupervisorPatternScorer({ + id: 'content-creation-network-delivery-readiness', + name: 'Content Creation Network Delivery Readiness', + description: + 'Checks whether the content response is ready to publish, revise, or hand off to the next editorial step.', + label: 'Content delivery response', + emptyReason: 'No usable content delivery response was produced.', + weakReason: 'The response is present but still needs handoff detail.', + strongReasonPrefix: 'This delivery response is strong because', + signals: [ + { + label: 'it includes draft or outline language', + regex: /draft|outline|headline|script|copy/i, + weight: 0.25, + }, + { + label: 'it addresses tone, audience, or positioning', + regex: /tone|audience|positioning|voice/i, + weight: 0.2, + }, + { + label: 'it includes delivery or revision guidance', + regex: /next step|revise|publish|review|qa/i, + weight: 0.2, + }, + ], + responseLengthThresholds: [ + { min: 140, weight: 0.2 }, + { min: 240, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, }) export const contentCreationNetwork = new Agent({ diff --git a/src/mastra/networks/dataPipelineNetwork.ts b/src/mastra/networks/dataPipelineNetwork.ts index 80610190..d8083dec 100644 --- a/src/mastra/networks/dataPipelineNetwork.ts +++ b/src/mastra/networks/dataPipelineNetwork.ts @@ -1,21 +1,11 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { dataExportAgent } from '../agents/dataExportAgent' import { dataIngestionAgent } from '../agents/dataIngestionAgent' import { dataTransformationAgent } from '../agents/dataTransformationAgent' import { reportAgent } from '../agents/reportAgent' import { log } from '../config/logger' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' import { confirmationTool } from '../tools/confirmation.tool' import { stockAnalysisWorkflow } from '../workflows/stock-analysis-workflow' import { LibsqlMemory } from '../config/libsql' @@ -26,151 +16,63 @@ log.info('Initializing Data Pipeline Network...') * Validates that the data pipeline network returns a concrete pipeline outcome * instead of only describing a potential handoff. */ -const dataPipelineNetworkTaskCompleteScorer = createScorer({ +const dataPipelineNetworkTaskCompleteScorer = createSupervisorPatternScorer({ id: 'data-pipeline-network-task-complete', name: 'Data Pipeline Network Task Completeness', description: 'Checks whether the network returned a concrete import, transform, export, or reporting outcome.', - type: 'agent', + label: 'Data pipeline response', + emptyReason: 'No usable data pipeline response was produced.', + weakReason: 'The response is present but still needs more report detail.', + strongReasonPrefix: 'This pipeline response is strong because', + signals: [ + { + label: 'it includes pipeline or transformation language', + regex: /csv|json|xml|transform|schema|columns|rows|report|summary|file/i, + weight: 0.4, + }, + ], + responseLengthThresholds: [{ min: 60, weight: 0.25 }], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { response, responseMessages, reasoning, tools, toolCallInfos } = - results.preprocessStepResult - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasResponse: responseText.length > 0, - responseLength: responseText.length, - hasPipelineLanguage: - /csv|json|xml|transform|schema|columns|rows|report|summary|file/i.test( - responseText - ), - hasStructure: /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 60) score += 0.25 - if (analysis.hasPipelineLanguage) score += 0.4 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable data pipeline response was produced.' - - const parts: string[] = [] - if (analysis.hasPipelineLanguage) parts.push('it includes pipeline or transformation language') - if (analysis.hasStructure) parts.push('it is structured and readable') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This pipeline response is strong because ${parts.join(', ')}.` : 'The response is present but still needs more report detail.'}` - }) /** * Checks that the data pipeline answer is execution-ready with explicit output, * validation, or next-step guidance. */ -const dataPipelineNetworkExecutionScorer = createScorer({ - id: 'data-pipeline-network-execution-readiness', - name: 'Data Pipeline Network Execution Readiness', - description: - 'Checks whether the data-pipeline response tells the user what was produced, what changed, and what to do next.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { response, responseMessages, reasoning, tools, toolCallInfos } = - results.preprocessStepResult - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasResponse: responseText.length > 0, - responseLength: responseText.length, - hasPipelineLanguage: - /output|result|generated|exported|transformed/i.test(responseText), - categoryMatches: [ - /validation|mismatch|missing|error|warning/i.test(responseText), - /next step|download|import|review|fix/i.test(responseText), - ].filter(Boolean).length, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 120) score += 0.25 - if (analysis.hasPipelineLanguage) score += 0.35 - if (analysis.categoryMatches >= 2) score += 0.2 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable pipeline execution result was produced.' - - const parts: string[] = [] - if (analysis.hasPipelineLanguage) parts.push('it includes pipeline or transformation language') - if (analysis.categoryMatches >= 2) parts.push('it covers validation and follow-up actions') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This execution response is strong because ${parts.join(', ')}.` : 'The response is present but still needs more execution detail.'}` +const dataPipelineNetworkExecutionScorer = + createSupervisorPatternScorer({ + id: 'data-pipeline-network-execution-readiness', + name: 'Data Pipeline Network Execution Readiness', + description: + 'Checks whether the data-pipeline response tells the user what was produced, what changed, and what to do next.', + label: 'Data pipeline execution response', + emptyReason: 'No usable pipeline execution result was produced.', + weakReason: 'The response is present but still needs more execution detail.', + strongReasonPrefix: 'This execution response is strong because', + signals: [ + { + label: 'it includes pipeline or transformation language', + regex: /output|result|generated|exported|transformed/i, + weight: 0.35, + }, + { + label: 'it covers validation detail', + regex: /validation|mismatch|missing|error|warning/i, + weight: 0.1, + }, + { + label: 'it covers follow-up actions', + regex: /next step|download|import|review|fix/i, + weight: 0.1, + }, + ], + responseLengthThresholds: [{ min: 120, weight: 0.25 }], + reasoningWeight: 0.05, + toolWeight: 0.05, }) export const dataPipelineNetwork = new Agent({ diff --git a/src/mastra/networks/devopsNetwork.ts b/src/mastra/networks/devopsNetwork.ts index af4e7795..2ef85690 100644 --- a/src/mastra/networks/devopsNetwork.ts +++ b/src/mastra/networks/devopsNetwork.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { codeArchitectAgent, codeReviewerAgent, @@ -20,6 +9,7 @@ import { evaluationAgent } from '../agents/evaluationAgent' import { danePackagePublisher } from '../agents/package-publisher' import { projectManagementAgent } from '../agents/projectManagementAgent' import { log } from '../config/logger' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' import { confirmationTool } from '../tools/confirmation.tool' import { LibsqlMemory } from '../config/libsql' log.info('Initializing DevOps Network...') @@ -28,192 +18,77 @@ log.info('Initializing DevOps Network...') * Checks that the DevOps network returns a deployable, testable, or operable * recommendation set instead of generic platform advice. */ -const devopsNetworkTaskCompleteScorer = createScorer({ +const devopsNetworkTaskCompleteScorer = createSupervisorPatternScorer({ id: 'devops-network-task-complete', name: 'DevOps Network Task Completeness', description: 'Checks whether the DevOps network returned a concrete delivery, deployment, or operations result.', - type: 'agent', + label: 'DevOps response', + emptyReason: 'No usable DevOps response was produced.', + weakReason: 'The response is present but still needs delivery detail.', + strongReasonPrefix: 'This DevOps response is strong because', + signals: [ + { + label: 'it includes DevOps-specific guidance', + regex: + /deploy|pipeline|release|monitor|infrastructure|ci\/cd|incident|rollback|test|package/i, + weight: 0.35, + }, + { + label: 'it includes operational or rollout detail', + regex: /monitor|rollback|incident|deploy|release|rollback|health|validation/i, + weight: 0.1, + }, + ], + responseLengthThresholds: [ + { min: 80, weight: 0.2 }, + { min: 160, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasDevopsLanguage: - /deploy|pipeline|release|monitor|infrastructure|ci\/cd|incident|rollback|test|package/i.test( - responseText - ), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - hasOps: - /monitor|rollback|incident|deploy|release|rollback|health|validation/i.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 80) score += 0.2 - if (analysis.responseLength >= 160) score += 0.1 - if (analysis.hasDevopsLanguage) score += 0.35 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasOps) score += 0.1 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable DevOps response was produced.' - - const parts: string[] = [] - if (analysis.hasDevopsLanguage) parts.push('it includes DevOps-specific guidance') - if (analysis.hasStructure) parts.push('it is structured for execution') - if (analysis.hasOps) parts.push('it includes operational or rollout detail') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This DevOps response is strong because ${parts.join(', ')}.` : 'The response is present but still needs delivery detail.'}` - }) /** * Checks that the DevOps answer is operationally actionable with rollout, * validation, and risk-management guidance. */ -const devopsNetworkExecutionScorer = createScorer({ +const devopsNetworkExecutionScorer = createSupervisorPatternScorer({ id: 'devops-network-execution-readiness', name: 'DevOps Network Execution Readiness', description: 'Checks whether the DevOps response includes rollout steps, validation gates, and operational risk guidance.', - type: 'agent', + label: 'DevOps execution response', + emptyReason: 'No usable DevOps execution response was produced.', + weakReason: 'The response is present but still lacks operational detail.', + strongReasonPrefix: 'This execution plan is strong because', + signals: [ + { + label: 'it includes deployment or rollout guidance', + regex: /deploy|release|rollout|pipeline|gate/i, + weight: 0.25, + }, + { + label: 'it includes validation gates or checks', + regex: /monitor|verify|smoke test|rollback|incident/i, + weight: 0.2, + }, + { + label: 'it names owners, milestones, or risks', + regex: /next step|owner|milestone|risk/i, + weight: 0.2, + }, + ], + responseLengthThresholds: [ + { min: 160, weight: 0.2 }, + { min: 280, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasDeploy: - /deploy|release|rollout|pipeline|gate/i.test(responseText), - hasValidation: - /monitor|verify|smoke test|rollback|incident/i.test(responseText), - hasRisk: - /next step|owner|milestone|risk/i.test(responseText), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 160) score += 0.2 - if (analysis.responseLength >= 280) score += 0.1 - if (analysis.hasDeploy) score += 0.25 - if (analysis.hasValidation) score += 0.2 - if (analysis.hasRisk) score += 0.2 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable DevOps execution response was produced.' - - const parts: string[] = [] - if (analysis.hasDeploy) parts.push('it includes deployment or rollout guidance') - if (analysis.hasValidation) parts.push('it includes validation gates or checks') - if (analysis.hasRisk) parts.push('it names owners, milestones, or risks') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This execution plan is strong because ${parts.join(', ')}.` : 'The response is present but still lacks operational detail.'}` - }) export const devopsNetwork = new Agent({ id: 'devops-network', diff --git a/src/mastra/networks/financialIntelligenceNetwork.ts b/src/mastra/networks/financialIntelligenceNetwork.ts index f19d0311..8dd3920f 100644 --- a/src/mastra/networks/financialIntelligenceNetwork.ts +++ b/src/mastra/networks/financialIntelligenceNetwork.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { chartDataProcessorAgent, chartGeneratorAgent, @@ -21,6 +10,7 @@ import { researchAgent } from '../agents/researchAgent' import { stockAnalysisAgent } from '../agents/stockAnalysisAgent' import { googleAI3 } from '../config/google' import { log } from '../config/logger' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' import { financialReportWorkflow } from '../workflows/financial-report-workflow' import { stockAnalysisWorkflow } from '../workflows/stock-analysis-workflow' import { LibsqlMemory } from '../config/libsql' @@ -31,100 +21,66 @@ log.info('Initializing Financial Intelligence Network...') * Checks that the financial-intelligence network returns actionable market * analysis, chart guidance, or reporting output. */ -const financialIntelligenceNetworkTaskCompleteScorer = createScorer({ - id: 'financial-intelligence-network-task-complete', - name: 'Financial Intelligence Network Task Completeness', - description: - 'Checks whether the network returned a substantive financial analysis or report.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { response, responseMessages, reasoning, tools, toolCallInfos } = - results.preprocessStepResult - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasResponse: responseText.length > 0, - responseLength: responseText.length, - hasFinanceLanguage: - /price|valuation|risk|chart|market|financial|report|trend|portfolio|disclaimer/i.test( - responseText - ), - hasStructure: /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 80) score += 0.3 - if (analysis.hasFinanceLanguage) score += 0.4 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable financial intelligence response was produced.' - - const parts: string[] = [] - if (analysis.hasFinanceLanguage) parts.push('it includes financial analysis language') - if (analysis.hasStructure) parts.push('it is structured for review') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This financial response is strong because ${parts.join(', ')}.` : 'The response is present but still needs more financial detail.'}` +const financialIntelligenceNetworkTaskCompleteScorer = + createSupervisorPatternScorer({ + id: 'financial-intelligence-network-task-complete', + name: 'Financial Intelligence Network Task Completeness', + description: + 'Checks whether the network returned a substantive financial analysis or report.', + label: 'Financial intelligence response', + emptyReason: 'No usable financial intelligence response was produced.', + weakReason: + 'The response is present but still needs more financial detail.', + strongReasonPrefix: 'This financial response is strong because', + signals: [ + { + label: 'it includes financial analysis language', + regex: + /price|valuation|risk|chart|market|financial|report|trend|portfolio|disclaimer/i, + weight: 0.4, + }, + ], + responseLengthThresholds: [{ min: 80, weight: 0.3 }], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) /** * Checks that the financial-intelligence answer is investor-ready with thesis, * risk framing, and a clear next action or caveat. */ -const financialIntelligenceNetworkDecisionScorer = createScorer({ - id: 'financial-intelligence-network-decision-readiness', - name: 'Financial Intelligence Network Decision Readiness', - description: - 'Checks whether the financial response includes thesis, risks, and decision-ready follow-up guidance.', - type: 'agent', -}).generateScore(async context => { - const normalizedText = ( - getAssistantMessageFromRunOutput(context.run.output) ?? - String(context.run.output ?? '') - ).trim() - const categoryMatches = [ - /thesis|outlook|valuation|trend|support|resistance/i.test(normalizedText), - /risk|downside|volatility|uncertain|disclaimer/i.test(normalizedText), - /next step|watch|monitor|consider|review/i.test(normalizedText), - ].filter(Boolean).length - - return normalizedText.length >= 160 && categoryMatches >= 2 ? 1 : 0 -}) +const financialIntelligenceNetworkDecisionScorer = + createSupervisorPatternScorer({ + id: 'financial-intelligence-network-decision-readiness', + name: 'Financial Intelligence Network Decision Readiness', + description: + 'Checks whether the financial response includes thesis, risks, and decision-ready follow-up guidance.', + label: 'Financial decision response', + emptyReason: 'No investor-ready financial response was produced.', + weakReason: + 'The response is present but still needs clearer thesis, risk framing, or follow-up guidance.', + strongReasonPrefix: 'This financial decision response is strong because', + signals: [ + { + label: 'it includes an investment thesis or outlook', + regex: /thesis|outlook|valuation|trend|support|resistance/i, + weight: 0.25, + }, + { + label: 'it frames risks or uncertainty', + regex: /risk|downside|volatility|uncertain|disclaimer/i, + weight: 0.25, + }, + { + label: 'it includes a clear follow-up action', + regex: /next step|watch|monitor|consider|review/i, + weight: 0.25, + }, + ], + responseLengthThresholds: [{ min: 160, weight: 0.2 }], + }) export const financialIntelligenceNetwork = new Agent({ id: 'financial-intelligence-network', diff --git a/src/mastra/networks/index.ts b/src/mastra/networks/index.ts index 5efd59f8..1623dbe6 100644 --- a/src/mastra/networks/index.ts +++ b/src/mastra/networks/index.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { copywriterAgent } from '../agents/copywriterAgent' import { editorAgent } from '../agents/editorAgent' import { reportAgent } from '../agents/reportAgent' @@ -27,6 +16,7 @@ import { translationAgent } from '../agents/translationAgent' import { LibsqlMemory } from '../config/libsql' import { googleAI } from '../config/google' import { log } from '../config/logger' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' import { weatherWorkflow } from '../workflows/weather-workflow' // CSV/Data Pipeline Networks @@ -68,101 +58,67 @@ import { * Checks that the primary network returns a useful routed answer instead of * stopping at a vague handoff explanation. */ -const agentNetworkTaskCompleteScorer = createScorer({ +const agentNetworkTaskCompleteScorer = createSupervisorPatternScorer({ id: 'primary-network-task-complete', name: 'Primary Network Task Completeness', description: 'Checks whether the primary network returned a concrete answer or actionable routed result.', - type: 'agent', + label: 'Primary network response', + emptyReason: 'No usable routed answer was produced.', + weakReason: 'The response is present but still needs more routing detail.', + strongReasonPrefix: 'This primary network response is strong because', + signals: [ + { + label: 'it includes useful routing language', + regex: + /recommend|summary|analysis|report|plan|translation|support|seo|next step/i, + weight: 0.3, + }, + { + label: 'it reads like a complete routed answer', + regex: /(?:[^.!?]+[.!?]){2,}/s, + weight: 0.2, + }, + ], + responseLengthThresholds: [{ min: 40, weight: 0.2 }], + minParagraphsForStructure: 999, + structureWeight: 0.1, + reasoningWeight: 0.05, + toolWeight: 0.05, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { response, responseMessages, reasoning, tools, toolCallInfos } = - results.preprocessStepResult - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasResponse: responseText.length > 0, - responseLength: responseText.length, - hasUsefulRouting: - /recommend|summary|analysis|report|plan|translation|support|seo|next step/i.test( - responseText - ), - sentenceCount: responseText.match(/[^.!?]+/g)?.length ?? 0, - hasStructure: /^#{1,6}\s|^[-*]\s|^\d+\.\s/m.test(responseText), - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 40) score += 0.2 - if (analysis.sentenceCount >= 2) score += 0.2 - if (analysis.hasUsefulRouting) score += 0.3 - if (analysis.hasStructure) score += 0.1 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable routed answer was produced.' - - const parts: string[] = [] - if (analysis.hasUsefulRouting) parts.push('it includes useful routing language') - if (analysis.hasStructure) parts.push('it is structured and readable') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This primary network response is strong because ${parts.join(', ')}.` : 'The response is present but still needs more routing detail.'}` - }) /** * Checks that the primary network answer is user-ready, concise, and ends with * a clear resolution path or next action. */ -const agentNetworkResolutionScorer = createScorer({ +const agentNetworkResolutionScorer = createSupervisorPatternScorer({ id: 'primary-network-resolution-readiness', name: 'Primary Network Resolution Readiness', description: 'Checks whether the primary network returned a direct answer with clear next steps or decision guidance.', - type: 'agent', -}).generateScore(async context => { - const normalizedText = ( - getAssistantMessageFromRunOutput(context.run.output) ?? - String(context.run.output ?? '') - ).trim() - const categoryMatches = [ - /recommend|suggest|best option|answer/i.test(normalizedText), - /next step|follow-up|if needed|you can/i.test(normalizedText), - /because|based on|given that/i.test(normalizedText), - ].filter(Boolean).length - - return normalizedText.length >= 120 && categoryMatches >= 2 ? 1 : 0 + label: 'Primary network resolution', + emptyReason: 'No resolution-ready response was produced.', + weakReason: + 'The response is present but still needs clearer next steps or decision guidance.', + strongReasonPrefix: 'This primary network resolution is strong because', + signals: [ + { + label: 'it includes a direct answer or recommendation', + regex: /recommend|suggest|best option|answer/i, + weight: 0.25, + }, + { + label: 'it includes next-step guidance', + regex: /next step|follow-up|if needed|you can/i, + weight: 0.25, + }, + { + label: 'it explains the reasoning behind the recommendation', + regex: /because|based on|given that/i, + weight: 0.25, + }, + ], + responseLengthThresholds: [{ min: 120, weight: 0.2 }], }) export const agentNetwork = new Agent({ diff --git a/src/mastra/networks/learningNetwork.ts b/src/mastra/networks/learningNetwork.ts index bdc039cc..1a427545 100644 --- a/src/mastra/networks/learningNetwork.ts +++ b/src/mastra/networks/learningNetwork.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { TokenLimiterProcessor } from '@mastra/core/processors' @@ -20,6 +9,7 @@ import { learningExtractionAgent } from '../agents/learningExtractionAgent' import { researchAgent } from '../agents/researchAgent' import { googleAI3 } from '../config/google' import { log } from '../config/logger' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' import { learningExtractionWorkflow } from '../workflows/learning-extraction-workflow' import { researchSynthesisWorkflow } from '../workflows/research-synthesis-workflow' import { LibsqlMemory } from '../config/libsql' @@ -30,188 +20,73 @@ log.info('Initializing Learning Network...') * Checks that the learning network returns actionable learning outcomes, * knowledge-organization guidance, or research-backed educational output. */ -const learningNetworkTaskCompleteScorer = createScorer({ +const learningNetworkTaskCompleteScorer = createSupervisorPatternScorer({ id: 'learning-network-task-complete', name: 'Learning Network Task Completeness', description: 'Checks whether the learning network returned concrete learnings, indexed knowledge guidance, or educational recommendations.', - type: 'agent', + label: 'Learning response', + emptyReason: 'No usable learning network response was produced.', + weakReason: 'The response is present but still needs educational detail.', + strongReasonPrefix: 'This learning response is strong because', + signals: [ + { + label: 'it includes learning or knowledge language', + regex: + /learning|knowledge|insight|objective|resource|curriculum|index|research|assessment/i, + weight: 0.4, + }, + ], + responseLengthThresholds: [ + { min: 70, weight: 0.2 }, + { min: 140, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasLearningLanguage: - /learning|knowledge|insight|objective|resource|curriculum|index|research|assessment/i.test( - responseText - ), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 70) score += 0.2 - if (analysis.responseLength >= 140) score += 0.1 - if (analysis.hasLearningLanguage) score += 0.4 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable learning network response was produced.' - - const parts: string[] = [] - if (analysis.hasLearningLanguage) parts.push('it includes learning or knowledge language') - if (analysis.hasStructure) parts.push('it is structured for handoff') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This learning response is strong because ${parts.join(', ')}.` : 'The response is present but still needs educational detail.'}` - }) /** * Checks that the learning answer is instructionally useful with outcomes, * structure, and a recommended next study step. */ -const learningNetworkOutcomeScorer = createScorer({ +const learningNetworkOutcomeScorer = createSupervisorPatternScorer({ id: 'learning-network-outcome-readiness', name: 'Learning Network Outcome Readiness', description: 'Checks whether the learning response includes practical takeaways, learning structure, and next-study guidance.', - type: 'agent', + label: 'Learning outcome response', + emptyReason: 'No usable learning outcome response was produced.', + weakReason: + 'The response is present but still needs clearer learning direction.', + strongReasonPrefix: 'This learning outcome response is strong because', + signals: [ + { + label: 'it includes practical takeaways', + regex: /takeaway|learning|objective|insight|concept/i, + weight: 0.25, + }, + { + label: 'it lays out a learning sequence or structure', + regex: /step|sequence|curriculum|resource|practice/i, + weight: 0.2, + }, + { + label: 'it suggests the next study action', + regex: /next step|study next|review|apply/i, + weight: 0.2, + }, + ], + responseLengthThresholds: [ + { min: 140, weight: 0.2 }, + { min: 240, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasTakeaway: - /takeaway|learning|objective|insight|concept/i.test(responseText), - hasSequence: - /step|sequence|curriculum|resource|practice/i.test(responseText), - hasNextStep: - /next step|study next|review|apply/i.test(responseText), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 140) score += 0.2 - if (analysis.responseLength >= 240) score += 0.1 - if (analysis.hasTakeaway) score += 0.25 - if (analysis.hasSequence) score += 0.2 - if (analysis.hasNextStep) score += 0.2 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable learning outcome response was produced.' - - const parts: string[] = [] - if (analysis.hasTakeaway) parts.push('it includes practical takeaways') - if (analysis.hasSequence) parts.push('it lays out a learning sequence or structure') - if (analysis.hasNextStep) parts.push('it suggests the next study action') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This learning outcome response is strong because ${parts.join(', ')}.` : 'The response is present but still needs clearer learning direction.'}` - }) export const learningNetwork = new Agent({ id: 'learning-network', diff --git a/src/mastra/networks/marketingAutomationNetwork.ts b/src/mastra/networks/marketingAutomationNetwork.ts index 180f34ec..dcd4f823 100644 --- a/src/mastra/networks/marketingAutomationNetwork.ts +++ b/src/mastra/networks/marketingAutomationNetwork.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { contentStrategistAgent } from '../agents/contentStrategistAgent' import { copywriterAgent } from '../agents/copywriterAgent' import { researchAgent } from '../agents/researchAgent' @@ -19,6 +8,7 @@ import { translationAgent } from '../agents/translationAgent' import { googleAI3 } from '../config/google' import { log } from '../config/logger' import { LibsqlMemory } from '../config/libsql' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' log.info('Initializing Marketing Automation Network...') @@ -26,190 +16,79 @@ log.info('Initializing Marketing Automation Network...') * Checks that the marketing network returns a campaign-ready deliverable rather * than only describing possible marketing work. */ -const marketingAutomationNetworkTaskCompleteScorer = createScorer({ - id: 'marketing-automation-network-task-complete', - name: 'Marketing Automation Network Task Completeness', - description: - 'Checks whether the marketing network returned a concrete strategy, campaign asset, or optimization plan.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasMarketingLanguage: - /campaign|audience|channel|seo|social|conversion|cta|timeline|kpi|localization/i.test( - responseText - ), - hasStructure: /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - hasStrategy: - /strategy|plan|brief|calendar|workflow|optimi[sz]e/i.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 80) score += 0.2 - if (analysis.responseLength >= 160) score += 0.1 - if (analysis.hasMarketingLanguage) score += 0.35 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasStrategy) score += 0.1 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable marketing automation response was produced.' - - const parts: string[] = [] - if (analysis.hasMarketingLanguage) parts.push('it includes campaign or channel guidance') - if (analysis.hasStructure) parts.push('it is structured for execution') - if (analysis.hasStrategy) parts.push('it includes strategy or planning detail') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This marketing response is strong because ${parts.join(', ')}.` : 'The response is present but still needs more concrete marketing detail.'}` +const marketingAutomationNetworkTaskCompleteScorer = + createSupervisorPatternScorer({ + id: 'marketing-automation-network-task-complete', + name: 'Marketing Automation Network Task Completeness', + description: + 'Checks whether the marketing network returned a concrete strategy, campaign asset, or optimization plan.', + label: 'Marketing automation response', + emptyReason: 'No usable marketing automation response was produced.', + weakReason: + 'The response is present but still needs more concrete marketing detail.', + strongReasonPrefix: 'This marketing response is strong because', + signals: [ + { + label: 'it includes campaign or channel guidance', + regex: + /campaign|audience|channel|seo|social|conversion|cta|timeline|kpi|localization/i, + weight: 0.35, + }, + { + label: 'it includes strategy or planning detail', + regex: /strategy|plan|brief|calendar|workflow|optimi[sz]e/i, + weight: 0.1, + }, + ], + responseLengthThresholds: [ + { min: 80, weight: 0.2 }, + { min: 160, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) /** * Checks that the marketing answer is campaign-ready with priorities, * measurement, and a channel execution path. */ -const marketingAutomationNetworkExecutionScorer = createScorer({ - id: 'marketing-automation-network-execution-readiness', - name: 'Marketing Automation Network Execution Readiness', - description: - 'Checks whether the marketing answer includes execution sequencing, KPIs, and channel-ready next actions.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasExecution: - /priority|phase|timeline|sequence|launch/i.test(responseText), - hasMetrics: - /kpi|metric|conversion|engagement|roi/i.test(responseText), - hasNextStep: - /next step|rollout|test|optimi[sz]e|publish/i.test(responseText), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 160) score += 0.2 - if (analysis.responseLength >= 260) score += 0.1 - if (analysis.hasExecution) score += 0.25 - if (analysis.hasMetrics) score += 0.2 - if (analysis.hasNextStep) score += 0.15 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable marketing execution response was produced.' - - const parts: string[] = [] - if (analysis.hasExecution) parts.push('it provides sequencing or launch guidance') - if (analysis.hasMetrics) parts.push('it ties work to metrics') - if (analysis.hasNextStep) parts.push('it includes concrete next actions') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This execution plan is strong because ${parts.join(', ')}.` : 'The response is present but still needs execution detail.'}` +const marketingAutomationNetworkExecutionScorer = + createSupervisorPatternScorer({ + id: 'marketing-automation-network-execution-readiness', + name: 'Marketing Automation Network Execution Readiness', + description: + 'Checks whether the marketing answer includes execution sequencing, KPIs, and channel-ready next actions.', + label: 'Marketing execution response', + emptyReason: 'No usable marketing execution response was produced.', + weakReason: 'The response is present but still needs execution detail.', + strongReasonPrefix: 'This execution plan is strong because', + signals: [ + { + label: 'it provides sequencing or launch guidance', + regex: /priority|phase|timeline|sequence|launch/i, + weight: 0.25, + }, + { + label: 'it ties work to metrics', + regex: /kpi|metric|conversion|engagement|roi/i, + weight: 0.2, + }, + { + label: 'it includes concrete next actions', + regex: /next step|rollout|test|optimi[sz]e|publish/i, + weight: 0.15, + }, + ], + responseLengthThresholds: [ + { min: 160, weight: 0.2 }, + { min: 260, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, }) export const marketingAutomationNetwork = new Agent({ diff --git a/src/mastra/networks/reportGenerationNetwork.ts b/src/mastra/networks/reportGenerationNetwork.ts index 9bae6c90..38eb7369 100644 --- a/src/mastra/networks/reportGenerationNetwork.ts +++ b/src/mastra/networks/reportGenerationNetwork.ts @@ -1,25 +1,11 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' - -import { - TokenLimiterProcessor -} from '@mastra/core/processors' import { dataIngestionAgent } from '../agents/dataIngestionAgent' import { dataTransformationAgent } from '../agents/dataTransformationAgent' import { reportAgent } from '../agents/reportAgent' import { researchAgent } from '../agents/researchAgent' import { googleAI3 } from '../config/google' import { log } from '../config/logger' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' import { financialReportWorkflow } from '../workflows/financial-report-workflow' import { learningExtractionWorkflow } from '../workflows/learning-extraction-workflow' import { researchSynthesisWorkflow } from '../workflows/research-synthesis-workflow' @@ -32,185 +18,73 @@ log.info('Initializing Report Generation Network...') * Checks whether the report-generation coordinator returned a concrete report * artifact, synthesis, or actionable report plan. */ -const reportGenerationNetworkTaskCompleteScorer = createScorer({ - id: 'report-generation-network-task-complete', - name: 'Report Generation Network Task Completeness', - description: - 'Checks whether the network returned a structured report outcome with findings or next actions.', - type: 'agent', -}).generateScore(async context => { - const normalizedText = ( - getAssistantMessageFromRunOutput(context.run.output) ?? - String(context.run.output ?? '') - ).trim() - const hasReportLanguage = - /report|summary|findings|sources|analysis|recommendation|executive/i.test( - normalizedText - ) - const paragraphCount = normalizedText - .split(/\n\s*\n/) - .filter(Boolean).length - - return normalizedText.length >= 80 && (hasReportLanguage || paragraphCount >= 2) - ? 1 - : 0 -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { response, responseMessages, reasoning, tools, toolCallInfos } = - results.preprocessStepResult - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasResponse: responseText.length > 0, - responseLength: responseText.length, - hasReportLanguage: - /report|summary|findings|sources|analysis|recommendation|executive/i.test( - responseText - ), - paragraphCount: responseText.split(/\n\s*\n/).filter(Boolean).length, - hasStructure: /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 80) score += 0.25 - if (analysis.hasReportLanguage) score += 0.35 - if (analysis.paragraphCount >= 2) score += 0.15 - if (analysis.hasStructure) score += 0.1 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable report-generation response was produced.' - - const parts: string[] = [] - if (analysis.hasReportLanguage) parts.push('it includes report or synthesis language') - if (analysis.hasStructure) parts.push('it is structured and readable') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This report response is strong because ${parts.join(', ')}.` : 'The response is present but still needs more report detail.'}` +const reportGenerationNetworkTaskCompleteScorer = + createSupervisorPatternScorer({ + id: 'report-generation-network-task-complete', + name: 'Report Generation Network Task Completeness', + description: + 'Checks whether the network returned a structured report outcome with findings or next actions.', + label: 'Report generation response', + emptyReason: 'No usable report-generation response was produced.', + weakReason: 'The response is present but still needs more report detail.', + strongReasonPrefix: 'This report response is strong because', + signals: [ + { + label: 'it includes report or synthesis language', + regex: + /report|summary|findings|sources|analysis|recommendation|executive/i, + weight: 0.35, + }, + { + label: 'it spans multiple report sections', + regex: /\n\s*\n/, + weight: 0.15, + }, + ], + responseLengthThresholds: [{ min: 80, weight: 0.25 }], + minParagraphsForStructure: 999, + structureWeight: 0.1, + reasoningWeight: 0.05, + toolWeight: 0.05, }) /** * Checks that the report-generation answer is synthesis-ready with findings, * evidence framing, and clear follow-up guidance. */ -const reportGenerationNetworkSynthesisScorer = createScorer({ - id: 'report-generation-network-synthesis-readiness', - name: 'Report Generation Network Synthesis Readiness', - description: - 'Checks whether the report answer contains findings, supporting rationale, and next actions.', - type: 'agent', -}).generateScore(async context => { - const normalizedText = ( - getAssistantMessageFromRunOutput(context.run.output) ?? - String(context.run.output ?? '') - ).trim() - const categoryMatches = [ - /finding|insight|summary|executive/i.test(normalizedText), - /source|evidence|data|based on/i.test(normalizedText), - /next step|recommend|follow-up|decision/i.test(normalizedText), - ].filter(Boolean).length - - return normalizedText.length >= 150 && categoryMatches >= 2 ? 1 : 0 -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { response, responseMessages, reasoning, tools, toolCallInfos } = - results.preprocessStepResult - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasResponse: responseText.length > 0, - responseLength: responseText.length, - hasReportLanguage: - /finding|insight|summary|executive/i.test(responseText), - hasEvidence: - /source|evidence|data|based on/i.test(responseText), - hasNextAction: - /next step|recommend|follow-up|decision/i.test(responseText), - hasStructure: /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 150) score += 0.25 - if (analysis.hasReportLanguage) score += 0.25 - if (analysis.hasEvidence) score += 0.2 - if (analysis.hasNextAction) score += 0.15 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable pipeline execution result was produced.' - - const parts: string[] = [] - if (analysis.hasReportLanguage) parts.push('it communicates report-ready output') - if (analysis.hasEvidence) parts.push('it includes evidence or data grounding') - if (analysis.hasNextAction) parts.push('it includes clear next actions') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This execution response is strong because ${parts.join(', ')}.` : 'The response is present but still needs more execution detail.'}` +const reportGenerationNetworkSynthesisScorer = + createSupervisorPatternScorer({ + id: 'report-generation-network-synthesis-readiness', + name: 'Report Generation Network Synthesis Readiness', + description: + 'Checks whether the report answer contains findings, supporting rationale, and next actions.', + label: 'Report synthesis response', + emptyReason: 'No usable report synthesis response was produced.', + weakReason: + 'The response is present but still needs findings, evidence, or next-action detail.', + strongReasonPrefix: 'This report synthesis response is strong because', + signals: [ + { + label: 'it communicates report-ready output', + regex: /finding|insight|summary|executive/i, + weight: 0.25, + }, + { + label: 'it includes evidence or data grounding', + regex: /source|evidence|data|based on/i, + weight: 0.2, + }, + { + label: 'it includes clear next actions', + regex: /next step|recommend|follow-up|decision/i, + weight: 0.15, + }, + ], + responseLengthThresholds: [{ min: 150, weight: 0.25 }], + minParagraphsForStructure: 999, + structureWeight: 0.05, + reasoningWeight: 0.05, + toolWeight: 0.05, }) export const reportGenerationNetwork = new Agent({ @@ -336,14 +210,6 @@ export const reportGenerationNetwork = new Agent({ researchSynthesisWorkflow, learningExtractionWorkflow, }, - outputProcessors: [ - new TokenLimiterProcessor(128000), - // new BatchPartsProcessor({ - // batchSize: 20, - // maxWaitTime: 100, - // emitOnNonText: true, - // }), - ], defaultOptions: { maxSteps: 20, delegation: { diff --git a/src/mastra/networks/researchPipelineNetwork.ts b/src/mastra/networks/researchPipelineNetwork.ts index b2449769..82433484 100644 --- a/src/mastra/networks/researchPipelineNetwork.ts +++ b/src/mastra/networks/researchPipelineNetwork.ts @@ -1,24 +1,11 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' -import { - TokenLimiterProcessor -} from '@mastra/core/processors' import { documentProcessingAgent } from '../agents/documentProcessingAgent' import { knowledgeIndexingAgent } from '../agents/knowledgeIndexingAgent' import { researchAgent } from '../agents/researchAgent' import { researchPaperAgent } from '../agents/researchPaperAgent' import { googleAI3 } from '../config/google' import { log } from '../config/logger' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' import { contentReviewWorkflow } from '../workflows/content-review-workflow' import { documentProcessingWorkflow } from '../workflows/document-processing-workflow' import { LibsqlMemory } from '../config/libsql' @@ -29,187 +16,74 @@ log.info('Initializing Research Pipeline Network...') * Checks that the research pipeline returns a concrete discovery, indexing, * retrieval, or synthesis outcome. */ -const researchPipelineNetworkTaskCompleteScorer = createScorer({ - id: 'research-pipeline-network-task-complete', - name: 'Research Pipeline Network Task Completeness', - description: - 'Checks whether the research pipeline returned a substantive research or indexing result.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasResearchLanguage: - /paper|arxiv|index|chunk|retriev|knowledge|citation|source|synthesis/i.test( - responseText - ), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 80) score += 0.2 - if (analysis.responseLength >= 160) score += 0.1 - if (analysis.hasResearchLanguage) score += 0.4 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable research pipeline response was produced.' - - const parts: string[] = [] - if (analysis.hasResearchLanguage) parts.push('it includes research or indexing language') - if (analysis.hasStructure) parts.push('it is structured for handoff') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This research response is strong because ${parts.join(', ')}.` : 'The response is present but still lacks pipeline detail.'}` +const researchPipelineNetworkTaskCompleteScorer = + createSupervisorPatternScorer({ + id: 'research-pipeline-network-task-complete', + name: 'Research Pipeline Network Task Completeness', + description: + 'Checks whether the research pipeline returned a substantive research or indexing result.', + label: 'Research pipeline response', + emptyReason: 'No usable research pipeline response was produced.', + weakReason: 'The response is present but still lacks pipeline detail.', + strongReasonPrefix: 'This research response is strong because', + signals: [ + { + label: 'it includes research or indexing language', + regex: + /paper|arxiv|index|chunk|retriev|knowledge|citation|source|synthesis/i, + weight: 0.4, + }, + ], + responseLengthThresholds: [ + { min: 80, weight: 0.2 }, + { min: 160, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) /** * Checks that the research-pipeline answer communicates concrete discovery, * indexing, or retrieval value plus the next research step. */ -const researchPipelineNetworkOutcomeScorer = createScorer({ - id: 'research-pipeline-network-outcome-readiness', - name: 'Research Pipeline Network Outcome Readiness', - description: - 'Checks whether the research-pipeline response communicates usable findings, artifacts, or next retrieval actions.', - type: 'agent', -}) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasArtifacts: - /paper|document|chunk|index|knowledge base/i.test(responseText), - hasRetrieval: - /retriev|query|search|result|citation/i.test(responseText), - hasNextStep: - /next step|index next|query next|review|refine/i.test(responseText), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 150) score += 0.2 - if (analysis.responseLength >= 260) score += 0.1 - if (analysis.hasArtifacts) score += 0.25 - if (analysis.hasRetrieval) score += 0.2 - if (analysis.hasNextStep) score += 0.15 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable research outcome response was produced.' - - const parts: string[] = [] - if (analysis.hasArtifacts) parts.push('it references papers, documents, or indexed artifacts') - if (analysis.hasRetrieval) parts.push('it includes retrieval or citation context') - if (analysis.hasNextStep) parts.push('it gives a next retrieval or review step') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This outcome response is strong because ${parts.join(', ')}.` : 'The response is present but still needs more actionable research detail.'}` +const researchPipelineNetworkOutcomeScorer = + createSupervisorPatternScorer({ + id: 'research-pipeline-network-outcome-readiness', + name: 'Research Pipeline Network Outcome Readiness', + description: + 'Checks whether the research-pipeline response communicates usable findings, artifacts, or next retrieval actions.', + label: 'Research pipeline outcome response', + emptyReason: 'No usable research outcome response was produced.', + weakReason: + 'The response is present but still needs more actionable research detail.', + strongReasonPrefix: 'This outcome response is strong because', + signals: [ + { + label: 'it references papers, documents, or indexed artifacts', + regex: /paper|document|chunk|index|knowledge base/i, + weight: 0.25, + }, + { + label: 'it includes retrieval or citation context', + regex: /retriev|query|search|result|citation/i, + weight: 0.2, + }, + { + label: 'it gives a next retrieval or review step', + regex: /next step|index next|query next|review|refine/i, + weight: 0.15, + }, + ], + responseLengthThresholds: [ + { min: 150, weight: 0.2 }, + { min: 260, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, }) /** @@ -350,14 +224,6 @@ Use for: building research knowledge bases, literature reviews, indexing academi }, // tools: { confirmationTool }, options: {}, - outputProcessors: [ - new TokenLimiterProcessor(128000), - // new BatchPartsProcessor({ - // batchSize: 20, - // maxWaitTime: 100, - // emitOnNonText: true, - // }), - ], defaultOptions: { maxSteps: 22, delegation: { diff --git a/src/mastra/networks/securityNetwork.ts b/src/mastra/networks/securityNetwork.ts index d169b731..d32961fa 100644 --- a/src/mastra/networks/securityNetwork.ts +++ b/src/mastra/networks/securityNetwork.ts @@ -1,15 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { createScorer } from '@mastra/core/evals' -import { - extractAgentResponseMessages, - extractInputMessages, - extractToolCalls, - getAssistantMessageFromRunOutput, - getCombinedSystemPrompt, - getReasoningFromRunOutput, - getSystemMessagesFromRunInput, - getUserMessageFromRunInput, -} from '@mastra/evals/scorers/utils' import { codeReviewerAgent } from '../agents/codingAgents' import { evaluationAgent } from '../agents/evaluationAgent' import { reportAgent } from '../agents/reportAgent' @@ -17,6 +6,7 @@ import { researchAgent } from '../agents/researchAgent' import { googleAI3 } from '../config/google' import { log } from '../config/logger' import { LibsqlMemory } from '../config/libsql' +import { createSupervisorPatternScorer } from '../scorers/supervisor-scorers' log.info('Initializing Security Network...') @@ -24,188 +14,73 @@ log.info('Initializing Security Network...') * Checks that the security network returns a concrete assessment, mitigation, * or reporting outcome instead of only generic security posture language. */ -const securityNetworkTaskCompleteScorer = createScorer({ +const securityNetworkTaskCompleteScorer = createSupervisorPatternScorer({ id: 'security-network-task-complete', name: 'Security Network Task Completeness', description: 'Checks whether the security network returned actionable security findings or mitigation guidance.', - type: 'agent', + label: 'Security response', + emptyReason: 'No usable security response was produced.', + weakReason: 'The response is present but still needs mitigation detail.', + strongReasonPrefix: 'This security response is strong because', + signals: [ + { + label: 'it includes security or risk language', + regex: + /security|vulnerability|risk|mitigation|compliance|incident|control|assessment|finding/i, + weight: 0.4, + }, + ], + responseLengthThresholds: [ + { min: 80, weight: 0.2 }, + { min: 160, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.15, + reasoningWeight: 0.05, + toolWeight: 0.05, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasSecurityLanguage: - /security|vulnerability|risk|mitigation|compliance|incident|control|assessment|finding/i.test( - responseText - ), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 80) score += 0.2 - if (analysis.responseLength >= 160) score += 0.1 - if (analysis.hasSecurityLanguage) score += 0.4 - if (analysis.hasStructure) score += 0.15 - if (analysis.hasReasoning) score += 0.05 - if (analysis.toolCount > 0) score += 0.05 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable security response was produced.' - - const parts: string[] = [] - if (analysis.hasSecurityLanguage) parts.push('it includes security or risk language') - if (analysis.hasStructure) parts.push('it is structured for handoff') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This security response is strong because ${parts.join(', ')}.` : 'The response is present but still needs mitigation detail.'}` - }) /** * Checks that the security answer is remediation-ready with priority, impact, * and next mitigation actions. */ -const securityNetworkRemediationScorer = createScorer({ +const securityNetworkRemediationScorer = createSupervisorPatternScorer({ id: 'security-network-remediation-readiness', name: 'Security Network Remediation Readiness', description: 'Checks whether the security response includes severity, mitigation, and follow-up guidance.', - type: 'agent', + label: 'Security remediation response', + emptyReason: 'No usable security remediation response was produced.', + weakReason: + 'The response is present but still needs concrete remediation detail.', + strongReasonPrefix: 'This remediation response is strong because', + signals: [ + { + label: 'it classifies severity or priority', + regex: /critical|high|medium|low|severity|priority/i, + weight: 0.2, + }, + { + label: 'it includes mitigation or remediation guidance', + regex: /mitigation|fix|control|remediation|contain/i, + weight: 0.25, + }, + { + label: 'it includes follow-up or monitoring steps', + regex: /next step|owner|monitor|validate|follow-up/i, + weight: 0.2, + }, + ], + responseLengthThresholds: [ + { min: 160, weight: 0.2 }, + { min: 280, weight: 0.1 }, + ], + minParagraphsForStructure: 999, + structureWeight: 0.05, + reasoningWeight: 0.03, + toolWeight: 0.02, }) - .preprocess(({ run }) => { - const userMessage = getUserMessageFromRunInput(run.input) - const inputMessages = extractInputMessages(run.input) - const systemMessages = getSystemMessagesFromRunInput(run.input) - const systemPrompt = getCombinedSystemPrompt(run.input) - const response = getAssistantMessageFromRunOutput(run.output) - const responseMessages = extractAgentResponseMessages(run.output) - const reasoning = getReasoningFromRunOutput(run.output) - const { tools, toolCallInfos } = extractToolCalls(run.output) - - return { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } - }) - .analyze(({ results }) => { - const { - userMessage, - inputMessages, - systemMessages, - systemPrompt, - response, - responseMessages, - reasoning, - tools, - toolCallInfos, - } = results.preprocessStepResult - - const responseText = (response ?? responseMessages.join('\n')).trim() - - return { - hasUserMessage: Boolean(userMessage), - inputMessageCount: inputMessages.length, - systemMessageCount: systemMessages.length, - systemPromptLength: systemPrompt.length, - responseLength: responseText.length, - hasResponse: responseText.length > 0, - hasReasoning: Boolean(reasoning), - toolCount: tools.length, - toolCallCount: toolCallInfos.length, - hasSeverity: - /critical|high|medium|low|severity|priority/i.test(responseText), - hasMitigation: - /mitigation|fix|control|remediation|contain/i.test(responseText), - hasFollowUp: - /next step|owner|monitor|validate|follow-up/i.test(responseText), - hasStructure: - /^[-*]\s|^\d+\.\s|^#{1,6}\s/m.test(responseText), - } - }) - .generateScore(({ results }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 0 - - let score = 0 - if (analysis.responseLength >= 160) score += 0.2 - if (analysis.responseLength >= 280) score += 0.1 - if (analysis.hasSeverity) score += 0.2 - if (analysis.hasMitigation) score += 0.25 - if (analysis.hasFollowUp) score += 0.2 - if (analysis.hasStructure) score += 0.05 - if (analysis.hasReasoning) score += 0.03 - if (analysis.toolCount > 0) score += 0.02 - - return Math.max(0, Math.min(1, score)) - }) - .generateReason(({ results, score }) => { - const analysis = results.analyzeStepResult - if (!analysis?.hasResponse) return 'No usable security remediation response was produced.' - - const parts: string[] = [] - if (analysis.hasSeverity) parts.push('it classifies severity or priority') - if (analysis.hasMitigation) parts.push('it includes mitigation or remediation guidance') - if (analysis.hasFollowUp) parts.push('it includes follow-up or monitoring steps') - - return `Score: ${score.toFixed(2)}. ${parts.length > 0 ? `This remediation response is strong because ${parts.join(', ')}.` : 'The response is present but still needs concrete remediation detail.'}` - }) export const securityNetwork = new Agent({ id: 'security-network', diff --git a/src/mastra/processors/output-guardrails.ts b/src/mastra/processors/output-guardrails.ts new file mode 100644 index 00000000..362b5551 --- /dev/null +++ b/src/mastra/processors/output-guardrails.ts @@ -0,0 +1,52 @@ +import { createWorkflow, createStep } from '@mastra/core/workflows' +import { + ProcessorStepSchema, + PIIDetector, + ModerationProcessor, + SystemPromptScrubber, + TokenLimiterProcessor, + BatchPartsProcessor, +} from '@mastra/core/processors' + +export const outputGuardrails = createWorkflow({ + id: 'output-guardrails', + inputSchema: ProcessorStepSchema, + outputSchema: ProcessorStepSchema, + type: 'processor' +}) + // Sequential: limit tokens first, then batch stream chunks + .then(createStep(new TokenLimiterProcessor({ limit: 256000 }))) + .then(createStep(new BatchPartsProcessor( + { batchSize: 10, emitOnNonText: false }, + + ))) + // Parallel: run independent checks at the same time + .parallel([ + createStep( + new PIIDetector({ + strategy: 'redact', + model: 'openrouter/google/gemma-4-31b-it:free', + }), + ), + createStep( + new ModerationProcessor({ + strategy: 'block', + model: 'openrouter/google/gemma-4-31b-it:free', + }), + ), + ]) + // Map to the redact branch to keep its transformed messages + .map(async ({ inputData }) => { + return inputData['processor:pii-detector'] + }) + // Sequential: scrubber depends on previous redaction output + .then( + createStep( + new SystemPromptScrubber({ + strategy: 'redact', + placeholderText: '[REDACTED]', + model: 'openrouter/google/gemma-4-31b-it:free', + }), + ), + ) + .commit() \ No newline at end of file diff --git a/src/mastra/scorers/supervisor-scorers.ts b/src/mastra/scorers/supervisor-scorers.ts index c57de9dd..0602a2e5 100644 --- a/src/mastra/scorers/supervisor-scorers.ts +++ b/src/mastra/scorers/supervisor-scorers.ts @@ -40,6 +40,69 @@ interface SupervisorSignals extends SupervisorSnapshot { keyTermCoverage: number } +interface SupervisorPatternSignal { + label: string + regex: RegExp + weight: number +} + +interface SupervisorPatternPenalty { + label: string + regex: RegExp + weight: number +} + +interface SupervisorPatternThreshold { + min: number + weight: number +} + +interface SupervisorPatternScorerOptions { + id: string + name: string + description: string + label: string + emptyReason: string + weakReason: string + strongReasonPrefix: string + signals: SupervisorPatternSignal[] + penaltySignals?: SupervisorPatternPenalty[] + responseLengthThresholds?: SupervisorPatternThreshold[] + minParagraphsForStructure?: number + structureWeight?: number + reasoningWeight?: number + toolWeight?: number + toolFallbackWeight?: number + userMessageWeight?: number + systemMessageWeight?: number +} + +interface SupervisorPatternAnalysis { + hasUserMessage: boolean + systemMessageCount: number + responseLength: number + hasResponse: boolean + hasReasoning: boolean + toolCount: number + hasStructure: boolean + matchedSignals: string[] + matchedPenaltySignals: string[] +} + +interface SupervisorAgentPatternScorerOptions + extends Omit { + signals?: SupervisorPatternSignal[] + penaltySignals?: SupervisorPatternPenalty[] +} + +interface SupervisorStructuredOutputPatternScorerOptions + extends Omit { + requiredFields: string[] + includeMarkdownFencePenalty?: boolean + signals?: SupervisorPatternSignal[] + penaltySignals?: SupervisorPatternPenalty[] +} + const STOPWORDS = new Set([ 'a', 'an', @@ -234,6 +297,255 @@ function generateSupervisorReason( return `Score: ${score.toFixed(2)}. ${label} is strong because ${details.join(', ')}.` } +/** + * Builds a reusable scorer for supervisor/coordinator responses that can stay local + * to each agent or network file while sharing the same preprocessing and scoring core. + */ +export function createSupervisorPatternScorer( + options: SupervisorPatternScorerOptions +) { + return createScorer({ + id: options.id, + name: options.name, + description: options.description, + type: 'agent', + }) + .preprocess(({ run }) => buildSupervisorSnapshot(run)) + .analyze(({ results }) => { + const snapshot = results.preprocessStepResult + const responseText = snapshot.responseText + const minParagraphsForStructure = options.minParagraphsForStructure ?? 2 + const matchedSignals = options.signals + .filter((signal) => signal.regex.test(responseText)) + .map((signal) => signal.label) + const matchedPenaltySignals = (options.penaltySignals ?? []) + .filter((signal) => signal.regex.test(responseText)) + .map((signal) => signal.label) + + return { + hasUserMessage: snapshot.userMessage.length > 0, + systemMessageCount: snapshot.systemMessageCount, + responseLength: responseText.length, + hasResponse: responseText.length > 0, + hasReasoning: snapshot.reasoningText.length > 0, + toolCount: snapshot.toolCount, + hasStructure: + STRUCTURE_REGEX.test(responseText) || + snapshot.paragraphCount >= minParagraphsForStructure, + matchedSignals, + matchedPenaltySignals, + } satisfies SupervisorPatternAnalysis + }) + .generateScore(({ results }) => { + const analysis = results.analyzeStepResult as + | SupervisorPatternAnalysis + | undefined + + if (!analysis?.hasResponse) { + return 0 + } + + let score = 0 + + if (analysis.hasUserMessage) { + score += options.userMessageWeight ?? 0 + } + + if (analysis.systemMessageCount > 0) { + score += options.systemMessageWeight ?? 0 + } + + for (const threshold of options.responseLengthThresholds ?? []) { + if (analysis.responseLength >= threshold.min) { + score += threshold.weight + } + } + + for (const signal of options.signals) { + if ( + analysis.matchedSignals.some( + (matchedSignal) => matchedSignal === signal.label + ) + ) { + score += signal.weight + } + } + + for (const signal of options.penaltySignals ?? []) { + if ( + analysis.matchedPenaltySignals.some( + (matchedSignal) => matchedSignal === signal.label + ) + ) { + score -= signal.weight + } + } + + if (analysis.hasStructure) { + score += options.structureWeight ?? 0 + } + + if (analysis.hasReasoning) { + score += options.reasoningWeight ?? 0 + } + + if (analysis.toolCount > 0) { + score += options.toolWeight ?? 0 + } else { + score += options.toolFallbackWeight ?? 0 + } + + return clamp(score) + }) + .generateReason(({ results, score }) => { + const analysis = results.analyzeStepResult as + | SupervisorPatternAnalysis + | undefined + + if (!analysis?.hasResponse) { + return options.emptyReason + } + + const details = [...analysis.matchedSignals] + + if (analysis.hasStructure && (options.structureWeight ?? 0) > 0) { + details.push('it is structured for execution') + } + + if (analysis.hasReasoning && (options.reasoningWeight ?? 0) > 0) { + details.push('it includes reasoning support') + } + + if (analysis.toolCount > 0 && (options.toolWeight ?? 0) > 0) { + details.push( + `it used ${analysis.toolCount} delegation signal(s)` + ) + } + + if (analysis.matchedPenaltySignals.length > 0) { + details.push( + `it should still improve ${analysis.matchedPenaltySignals.join( + ', ' + )}` + ) + } + + if (details.length === 0) { + return `Score: ${score.toFixed(2)}. ${options.weakReason}` + } + + return `Score: ${score.toFixed(2)}. ${options.strongReasonPrefix} ${details.join(', ')}.` + }) +} + +const SUPERVISOR_AGENT_DEFAULT_SIGNALS: SupervisorPatternSignal[] = [ + { + label: 'it opens with a direct summary or answer', + regex: SUMMARY_REGEX, + weight: 0.05, + }, + { + label: 'it includes evidence anchors or dated support', + regex: EVIDENCE_REGEX, + weight: 0.05, + }, + { + label: 'it includes next steps or follow-up guidance', + regex: NEXT_STEPS_REGEX, + weight: 0.05, + }, +] + +const SUPERVISOR_AGENT_DEFAULT_PENALTIES: SupervisorPatternPenalty[] = [ + { + label: 'raw routing chatter', + regex: ROUTING_CHATTER_REGEX, + weight: 0.1, + }, +] + +const SUPERVISOR_CHANNEL_DEFAULT_SIGNALS: SupervisorPatternSignal[] = [ + { + label: 'it stays concise enough for a public channel reply', + regex: /summary|quick take|top line|recommend/i, + weight: 0.05, + }, + { + label: 'it assigns a next action, owner, or follow-up', + regex: /owner|assignee|follow-up|next step|action item/i, + weight: 0.05, + }, +] + +/** + * Builds a supervisor-agent-specific scorer layer that preserves local domain + * signals while adding the shared answer-quality expectations we want across + * user-facing supervisor agents. + */ +export function createSupervisorAgentPatternScorer( + options: SupervisorAgentPatternScorerOptions +) { + return createSupervisorPatternScorer({ + ...options, + signals: [...SUPERVISOR_AGENT_DEFAULT_SIGNALS, ...(options.signals ?? [])], + penaltySignals: [ + ...SUPERVISOR_AGENT_DEFAULT_PENALTIES, + ...(options.penaltySignals ?? []), + ], + }) +} + +/** + * Builds a channel-oriented supervisor scorer for supervisors that need + * short, public, action-oriented replies on platforms such as Discord, Slack, + * or GitHub issues and pull requests. + */ +export function createSupervisorChannelPatternScorer( + options: SupervisorAgentPatternScorerOptions +) { + return createSupervisorAgentPatternScorer({ + ...options, + signals: [...SUPERVISOR_CHANNEL_DEFAULT_SIGNALS, ...(options.signals ?? [])], + }) +} + +/** + * Builds a structured-output-oriented supervisor scorer for supervisors that + * are expected to return stable fields or JSON-like payloads instead of only + * natural-language prose. + */ +export function createStructuredOutputSupervisorPatternScorer( + options: SupervisorStructuredOutputPatternScorerOptions +) { + const fieldSignals: SupervisorPatternSignal[] = options.requiredFields.map( + (fieldName) => ({ + label: `it includes the structured field "${fieldName}"`, + regex: new RegExp(`["'\`]?${fieldName}["'\`]?\\s*:`, 'i'), + weight: 0.05, + }) + ) + + const structuredPenaltySignals: SupervisorPatternPenalty[] = + options.includeMarkdownFencePenalty === false + ? [] + : [ + { + label: 'markdown fences around structured output', + regex: /```(?:json|yaml)?/i, + weight: 0.1, + }, + ] + + return createSupervisorAgentPatternScorer({ + ...options, + signals: [...fieldSignals, ...(options.signals ?? [])], + penaltySignals: [ + ...structuredPenaltySignals, + ...(options.penaltySignals ?? []), + ], + }) +} + /** * Measures whether the supervisor response stays user-facing, avoids raw routing chatter, * and presents a synthesized answer rather than exposing delegation mechanics.