diff --git a/.gitignore b/.gitignore index 9491a2f..72d1290 100644 --- a/.gitignore +++ b/.gitignore @@ -360,4 +360,10 @@ MigrationBackup/ .ionide/ # Fody - auto-generated XML schema -FodyWeavers.xsd \ No newline at end of file +FodyWeavers.xsd + +# Large model weights +weights/ + +# Git filter-branch leftovers +.git-rewrite/ \ No newline at end of file diff --git a/Build-Installer.ps1 b/Build-Installer.ps1 new file mode 100644 index 0000000..48e7493 --- /dev/null +++ b/Build-Installer.ps1 @@ -0,0 +1,137 @@ +<# +.SYNOPSIS + Build FlowVision installer using Inno Setup + +.DESCRIPTION + This script builds the FlowVision installer. It requires: + 1. Inno Setup 6 to be installed + 2. The FlowVision project to be built in Release mode + +.EXAMPLE + .\Build-Installer.ps1 +#> + +param( + [string]$InnoSetupPath = "", + [switch]$SkipBuild +) + +$ErrorActionPreference = "Stop" + +Write-Host "=========================================" -ForegroundColor Cyan +Write-Host "FlowVision Installer Builder" -ForegroundColor Cyan +Write-Host "=========================================" -ForegroundColor Cyan +Write-Host "" + +# Find Inno Setup +if (-not $InnoSetupPath) { + $locations = @( + "C:\Program Files (x86)\Inno Setup 6\ISCC.exe", + "C:\Program Files\Inno Setup 6\ISCC.exe", + "C:\Program Files (x86)\Inno Setup 5\ISCC.exe", + "C:\Program Files\Inno Setup 5\ISCC.exe" + ) + + foreach ($loc in $locations) { + if (Test-Path $loc) { + $InnoSetupPath = $loc + break + } + } +} + +if (-not $InnoSetupPath -or -not (Test-Path $InnoSetupPath)) { + Write-Host "ERROR: Inno Setup not found!" -ForegroundColor Red + Write-Host "" + Write-Host "Please install Inno Setup 6 from: https://jrsoftware.org/isdl.php" -ForegroundColor Yellow + Write-Host "" + Write-Host "Or specify the path with: .\Build-Installer.ps1 -InnoSetupPath 'C:\path\to\ISCC.exe'" -ForegroundColor Yellow + exit 1 +} + +Write-Host "Using Inno Setup: $InnoSetupPath" -ForegroundColor Green + +# Build the project first +if (-not $SkipBuild) { + Write-Host "" + Write-Host "Building FlowVision in Release mode..." -ForegroundColor Yellow + + $msbuild = "C:\Program Files\Microsoft Visual Studio\2022\Community\MSBuild\Current\Bin\MSBuild.exe" + if (-not (Test-Path $msbuild)) { + $msbuild = "C:\Program Files\Microsoft Visual Studio\2022\Professional\MSBuild\Current\Bin\MSBuild.exe" + } + if (-not (Test-Path $msbuild)) { + $msbuild = "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin\MSBuild.exe" + } + + if (Test-Path $msbuild) { + & $msbuild "FlowVision\FlowVision.csproj" /t:Build /p:Configuration=Release /v:minimal + if ($LASTEXITCODE -ne 0) { + Write-Host "ERROR: Build failed!" -ForegroundColor Red + exit 1 + } + Write-Host "Build completed successfully!" -ForegroundColor Green + } else { + Write-Host "WARNING: MSBuild not found, skipping build step" -ForegroundColor Yellow + } +} + +# Verify required files exist +Write-Host "" +Write-Host "Verifying build output..." -ForegroundColor Yellow + +$requiredFiles = @( + "FlowVision\bin\Release\FlowVision.exe", + "FlowVision\bin\Release\onnxruntime.dll", + "FlowVision\bin\Release\tesseract50.dll", + "FlowVision\bin\Release\tessdata\eng.traineddata" +) + +$missing = @() +foreach ($file in $requiredFiles) { + if (-not (Test-Path $file)) { + $missing += $file + } +} + +if ($missing.Count -gt 0) { + Write-Host "ERROR: Missing required files:" -ForegroundColor Red + foreach ($file in $missing) { + Write-Host " - $file" -ForegroundColor Red + } + exit 1 +} + +Write-Host "All required files present!" -ForegroundColor Green + +# Create installer output directory +$installerDir = "installer" +if (-not (Test-Path $installerDir)) { + New-Item -ItemType Directory -Path $installerDir | Out-Null +} + +# Build the installer +Write-Host "" +Write-Host "Building installer..." -ForegroundColor Yellow +Write-Host "This may take several minutes due to the large model files." -ForegroundColor Gray + +$issFile = "FlowVision-Installer.iss" +& $InnoSetupPath $issFile + +if ($LASTEXITCODE -ne 0) { + Write-Host "ERROR: Installer build failed!" -ForegroundColor Red + exit 1 +} + +Write-Host "" +Write-Host "=========================================" -ForegroundColor Green +Write-Host "Installer built successfully!" -ForegroundColor Green +Write-Host "=========================================" -ForegroundColor Green +Write-Host "" + +# Show the output +$installer = Get-ChildItem "installer\*.exe" | Sort-Object LastWriteTime -Descending | Select-Object -First 1 +if ($installer) { + Write-Host "Installer: $($installer.FullName)" -ForegroundColor Cyan + Write-Host "Size: $([math]::Round($installer.Length/1MB, 2)) MB" -ForegroundColor Cyan +} diff --git a/FlowVision-Installer.iss b/FlowVision-Installer.iss new file mode 100644 index 0000000..c1118e9 --- /dev/null +++ b/FlowVision-Installer.iss @@ -0,0 +1,91 @@ +; FlowVision Installer Script for Inno Setup +; This script creates a single-file installer that includes all dependencies + +#define MyAppName "FlowVision" +#define MyAppVersion "1.0.0" +#define MyAppPublisher "FlowVision" +#define MyAppExeName "FlowVision.exe" +#define MyAppURL "https://github.com/flowvision" + +[Setup] +; Basic installer settings +AppId={{F8E2D3A4-5B6C-7D8E-9F0A-1B2C3D4E5F6A} +AppName={#MyAppName} +AppVersion={#MyAppVersion} +AppPublisher={#MyAppPublisher} +AppPublisherURL={#MyAppURL} +DefaultDirName={autopf}\{#MyAppName} +DefaultGroupName={#MyAppName} +AllowNoIcons=yes +; Output settings +OutputDir=installer +OutputBaseFilename=FlowVision-Setup-{#MyAppVersion} +; Compression - use LZMA2 for best compression of large files +Compression=lzma2/ultra64 +SolidCompression=yes +LZMAUseSeparateProcess=yes +LZMANumBlockThreads=4 +; UI settings +WizardStyle=modern +SetupIconFile=FlowVision\recursive-control-icon.ico +; Privileges +PrivilegesRequired=lowest +PrivilegesRequiredOverridesAllowed=dialog +; Architecture +ArchitecturesAllowed=x64compatible +ArchitecturesInstallIn64BitMode=x64compatible +; Disk space info +DiskSpanning=no + +[Languages] +Name: "english"; MessagesFile: "compiler:Default.isl" + +[Tasks] +Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked + +[Files] +; Main executable (contains embedded managed DLLs and detection model) +Source: "FlowVision\bin\Release\FlowVision.exe"; DestDir: "{app}"; Flags: ignoreversion +Source: "FlowVision\bin\Release\FlowVision.exe.config"; DestDir: "{app}"; Flags: ignoreversion skipifsourcedoesntexist + +; Native DLLs (required - cannot be embedded in .NET exe) +Source: "FlowVision\bin\Release\onnxruntime.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "FlowVision\bin\Release\onnxruntime_providers_shared.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "FlowVision\bin\Release\tesseract50.dll"; DestDir: "{app}"; Flags: ignoreversion +Source: "FlowVision\bin\Release\leptonica-1.82.0.dll"; DestDir: "{app}"; Flags: ignoreversion + +; Any additional DLLs that weren't embedded +Source: "FlowVision\bin\Release\*.dll"; DestDir: "{app}"; Flags: ignoreversion skipifsourcedoesntexist + +; Tesseract OCR data +Source: "FlowVision\bin\Release\tessdata\*"; DestDir: "{app}\tessdata"; Flags: ignoreversion recursesubdirs createallsubdirs + +; Florence-2 Caption Models (large ONNX files) +Source: "FlowVision\bin\Release\models\*"; DestDir: "{app}\models"; Flags: ignoreversion recursesubdirs createallsubdirs + +; Playwright browser automation files +Source: "FlowVision\bin\Release\.playwright\*"; DestDir: "{app}\.playwright"; Flags: ignoreversion recursesubdirs createallsubdirs skipifsourcedoesntexist + +; Native x64/x86 libraries +Source: "FlowVision\bin\Release\x64\*"; DestDir: "{app}\x64"; Flags: ignoreversion recursesubdirs createallsubdirs skipifsourcedoesntexist +Source: "FlowVision\bin\Release\x86\*"; DestDir: "{app}\x86"; Flags: ignoreversion recursesubdirs createallsubdirs skipifsourcedoesntexist + +; Web UI files (HTML, CSS, JS) +Source: "FlowVision\bin\Release\*.html"; DestDir: "{app}"; Flags: ignoreversion skipifsourcedoesntexist +Source: "FlowVision\bin\Release\*.css"; DestDir: "{app}"; Flags: ignoreversion skipifsourcedoesntexist +Source: "FlowVision\bin\Release\*.js"; DestDir: "{app}"; Flags: ignoreversion skipifsourcedoesntexist + +[Icons] +Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}" +Name: "{group}\{cm:UninstallProgram,{#MyAppName}}"; Filename: "{uninstallexe}" +Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon + +[Run] +Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent + +[Code] +function InitializeSetup(): Boolean; +begin + Result := True; +end; + diff --git a/FlowVision.Tests/FlowVision.Tests.csproj b/FlowVision.Tests/FlowVision.Tests.csproj new file mode 100644 index 0000000..494c5f1 --- /dev/null +++ b/FlowVision.Tests/FlowVision.Tests.csproj @@ -0,0 +1,14 @@ + + + net6.0 + false + + + + + + + + + + diff --git a/FlowVision.Tests/MultiAgentActionerTests.cs b/FlowVision.Tests/MultiAgentActionerTests.cs new file mode 100644 index 0000000..3e0dff5 --- /dev/null +++ b/FlowVision.Tests/MultiAgentActionerTests.cs @@ -0,0 +1,93 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using FlowVision.lib.Classes; +using System; +using System.Reflection; +using System.IO; +using System.Collections.Generic; + +namespace FlowVision.Tests +{ + [TestClass] + public class MultiAgentActionerTests + { + private static string CallExtract(string plan) + { + var actioner = new MultiAgentActioner(null); + var method = typeof(MultiAgentActioner).GetMethod("ExtractActionableStep", BindingFlags.NonPublic | BindingFlags.Instance); + return (string)method.Invoke(actioner, new object[] { plan }); + } + + [TestMethod] + public void ExtractActionableStep_ReturnsFirstActionableLine() + { + string plan = "1. Use WindowSelectionPlugin to list windows\n2. Use ScreenCapturePlugin to capture"; + string result = CallExtract(plan); + Assert.AreEqual("Use WindowSelectionPlugin to list windows", result); + } + + [TestMethod] + public void ExtractActionableStep_SingleLineActionable() + { + string plan = "Use MousePlugin to click start button"; + string result = CallExtract(plan); + Assert.AreEqual(plan, result); + } + + [TestMethod] + public void ExtractActionableStep_ReturnsNullForNonActionable() + { + string plan = "Hello there"; + string result = CallExtract(plan); + Assert.IsNull(result); + } + + private static string ConfigPath(string name) + { + return Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "Config", $"{name}.json"); + } + + [TestMethod] + public void PlannerPrompt_ContainsTools_WhenDynamicPromptsEnabled() + { + var config = new ToolConfig(); + config.DynamicToolPrompts = true; + config.EnableCMDPlugin = true; // ensure at least one tool + config.SaveConfig("toolsconfig"); + + var actioner = new MultiAgentActioner(null); + actioner.SetChatHistory(new List()); + + var field = typeof(MultiAgentActioner).GetField("plannerHistory", BindingFlags.NonPublic | BindingFlags.Instance); + var history = field.GetValue(actioner); + var enumerator = ((System.Collections.IEnumerable)history).GetEnumerator(); + enumerator.MoveNext(); + var first = enumerator.Current; + string content = (string)first.GetType().GetProperty("Content").GetValue(first); + + Assert.IsTrue(content.Contains("You have access to the following tools")); + } + + [TestMethod] + public void PlannerPrompt_OmitsTools_WhenDynamicPromptsDisabled() + { + var config = new ToolConfig(); + config.DynamicToolPrompts = false; + config.EnableCMDPlugin = true; + config.SaveConfig("toolsconfig"); + + var actioner = new MultiAgentActioner(null); + actioner.SetChatHistory(new List()); + + var field = typeof(MultiAgentActioner).GetField("plannerHistory", BindingFlags.NonPublic | BindingFlags.Instance); + var history = field.GetValue(actioner); + var enumerator = ((System.Collections.IEnumerable)history).GetEnumerator(); + enumerator.MoveNext(); + var first = enumerator.Current; + string content = (string)first.GetType().GetProperty("Content").GetValue(first); + + Assert.IsFalse(content.Contains("You have access to the following tools")); + } + } +} diff --git a/FlowVision.sln b/FlowVision.sln index b1bc0ba..28b6026 100644 --- a/FlowVision.sln +++ b/FlowVision.sln @@ -1,10 +1,12 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 -VisualStudioVersion = 17.13.35828.75 d17.13 +VisualStudioVersion = 17.13.35828.75 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FlowVision", "FlowVision\FlowVision.csproj", "{D0C80BFC-F9E8-4B7D-B3F0-C1A17C0DAB4C}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FlowVision.Tests", "FlowVision.Tests\FlowVision.Tests.csproj", "{5B7C3BAC-8534-42DC-A2B3-A5E4FEF49F1F}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -15,6 +17,10 @@ Global {D0C80BFC-F9E8-4B7D-B3F0-C1A17C0DAB4C}.Debug|Any CPU.Build.0 = Debug|Any CPU {D0C80BFC-F9E8-4B7D-B3F0-C1A17C0DAB4C}.Release|Any CPU.ActiveCfg = Release|Any CPU {D0C80BFC-F9E8-4B7D-B3F0-C1A17C0DAB4C}.Release|Any CPU.Build.0 = Release|Any CPU + {5B7C3BAC-8534-42DC-A2B3-A5E4FEF49F1F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5B7C3BAC-8534-42DC-A2B3-A5E4FEF49F1F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5B7C3BAC-8534-42DC-A2B3-A5E4FEF49F1F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5B7C3BAC-8534-42DC-A2B3-A5E4FEF49F1F}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/FlowVision/AIProviderConfigForm.cs b/FlowVision/AIProviderConfigForm.cs new file mode 100644 index 0000000..659ffce --- /dev/null +++ b/FlowVision/AIProviderConfigForm.cs @@ -0,0 +1,789 @@ +using System; +using System.Drawing; +using System.Threading.Tasks; +using System.Windows.Forms; +using FlowVision.lib.Classes; + +namespace FlowVision +{ + /// + /// Unified AI Provider Configuration Form + /// Allows switching between Azure OpenAI, LM Studio, GitHub Models, etc. + /// + public partial class AIProviderConfigForm : Form + { + private string currentModel; + private ComboBox providerComboBox; + private Panel configPanel; + + // Azure OpenAI controls + private Panel azurePanel; + private TextBox azureDeploymentTextBox; + private TextBox azureEndpointTextBox; + private TextBox azureApiKeyTextBox; + private NumericUpDown azureTemperatureUpDown; // Added missing field + + // Gemini controls + private Panel geminiPanel; + private TextBox geminiApiKeyTextBox; + private TextBox geminiModelTextBox; + + // LM Studio controls + private Panel lmStudioPanel; + private TextBox lmStudioEndpointTextBox; + private TextBox lmStudioModelTextBox; + private NumericUpDown lmStudioTemperatureUpDown; + private NumericUpDown lmStudioMaxTokensUpDown; + + // Common controls + private Button saveButton; + private Button testButton; + private Label statusLabel; + private CheckBox enableProviderCheckBox; + + public AIProviderConfigForm(string model) + { + currentModel = model; + InitializeComponent(); + LoadConfiguration(); + } + + private void InitializeComponent() + { + this.Text = $"AI Provider Configuration - {currentModel}"; + this.Size = new Size(650, 600); + this.FormBorderStyle = FormBorderStyle.FixedDialog; + this.MaximizeBox = false; + this.MinimizeBox = false; + this.StartPosition = FormStartPosition.CenterParent; + + int y = 20; + int leftMargin = 20; + int labelWidth = 150; + int controlWidth = 450; + + // Title + var titleLabel = new Label + { + Text = "Choose Your AI Provider", + Font = new Font(this.Font.FontFamily, 14, FontStyle.Bold), + Location = new Point(leftMargin, y), + AutoSize = true + }; + this.Controls.Add(titleLabel); + y += 40; + + // Info + var infoLabel = new Label + { + Text = "Select which AI service to use for this agent. You can switch anytime!", + Location = new Point(leftMargin, y), + Size = new Size(580, 30), + ForeColor = Color.Gray + }; + this.Controls.Add(infoLabel); + y += 40; + + // Provider selector + var providerLabel = new Label + { + Text = "AI Provider:", + Location = new Point(leftMargin, y + 3), + Width = labelWidth + }; + this.Controls.Add(providerLabel); + + providerComboBox = new ComboBox + { + Location = new Point(leftMargin + labelWidth + 10, y), + Width = 250, + DropDownStyle = ComboBoxStyle.DropDownList + }; + providerComboBox.Items.AddRange(new object[] { + "Azure OpenAI (Cloud)", + "LM Studio (Local)", + "Google Gemini", + "GitHub Models (Free Tier)" + }); + providerComboBox.SelectedIndexChanged += ProviderComboBox_SelectedIndexChanged; + this.Controls.Add(providerComboBox); + y += 40; + + // Enable checkbox + enableProviderCheckBox = new CheckBox + { + Text = "Enable this provider (if unchecked, will use default Azure OpenAI)", + Location = new Point(leftMargin, y), + Width = 500, + Checked = true + }; + this.Controls.Add(enableProviderCheckBox); + y += 35; + + // Separator + var separator = new Label + { + BorderStyle = BorderStyle.Fixed3D, + Location = new Point(leftMargin, y), + Size = new Size(580, 2) + }; + this.Controls.Add(separator); + y += 15; + + // Config panel (will hold provider-specific controls) + configPanel = new Panel + { + Location = new Point(leftMargin, y), + Size = new Size(580, 300), + BorderStyle = BorderStyle.None + }; + this.Controls.Add(configPanel); + y += 310; + + // Status label + statusLabel = new Label + { + Location = new Point(leftMargin, y), + Size = new Size(580, 25), + ForeColor = Color.Blue, + Text = "" + }; + this.Controls.Add(statusLabel); + y += 30; + + // Buttons + testButton = new Button + { + Text = "Test Connection", + Location = new Point(leftMargin, y), + Width = 130, + Height = 32 + }; + testButton.Click += TestButton_Click; + this.Controls.Add(testButton); + + saveButton = new Button + { + Text = "Save Configuration", + Location = new Point(leftMargin + 450, y), + Width = 150, + Height = 32 + }; + saveButton.Click += SaveButton_Click; + this.Controls.Add(saveButton); + + var cancelButton = new Button + { + Text = "Cancel", + Location = new Point(leftMargin + 310, y), + Width = 130, + Height = 32, + DialogResult = DialogResult.Cancel + }; + cancelButton.Click += (s, e) => this.Close(); + this.Controls.Add(cancelButton); + + this.AcceptButton = saveButton; + this.CancelButton = cancelButton; + + // Create provider-specific panels + CreateAzurePanel(); + CreateLMStudioPanel(); + CreateGeminiPanel(); + } + + private void CreateGeminiPanel() + { + geminiPanel = new Panel + { + Location = new Point(0, 0), + Size = new Size(580, 300), + Visible = false + }; + + int y = 0; + int labelWidth = 150; + int controlWidth = 400; + + // API Key + var apiKeyLabel = new Label { Text = "Gemini API Key:", Location = new Point(0, y + 3), Width = labelWidth }; + geminiPanel.Controls.Add(apiKeyLabel); + geminiApiKeyTextBox = new TextBox + { + Location = new Point(labelWidth + 10, y), + Width = controlWidth, + UseSystemPasswordChar = true + }; + geminiPanel.Controls.Add(geminiApiKeyTextBox); + y += 35; + + // Model Name + var modelLabel = new Label { Text = "Model Name:", Location = new Point(0, y + 3), Width = labelWidth }; + geminiPanel.Controls.Add(modelLabel); + geminiModelTextBox = new TextBox + { + Location = new Point(labelWidth + 10, y), + Width = controlWidth, + Text = "gemini-1.5-flash" + }; + geminiPanel.Controls.Add(geminiModelTextBox); + y += 35; + + // Info + var helpLabel = new Label + { + Text = "Get your API Key from: https://aistudio.google.com/app/apikey\n" + + "Standard Endpoint: https://generativelanguage.googleapis.com/v1beta/openai/", + Location = new Point(0, y), + Size = new Size(550, 40), + ForeColor = Color.Gray + }; + geminiPanel.Controls.Add(helpLabel); + + configPanel.Controls.Add(geminiPanel); + } + + private void CreateAzurePanel() + { + azurePanel = new Panel + { + Location = new Point(0, 0), + Size = new Size(580, 300), + Visible = false + }; + + int y = 0; + int labelWidth = 150; + int controlWidth = 400; + + // Deployment Name + var deployLabel = new Label { Text = "Deployment Name:", Location = new Point(0, y + 3), Width = labelWidth }; + azurePanel.Controls.Add(deployLabel); + azureDeploymentTextBox = new TextBox { Location = new Point(labelWidth + 10, y), Width = controlWidth }; + azurePanel.Controls.Add(azureDeploymentTextBox); + y += 35; + + // Endpoint URL + var endpointLabel = new Label { Text = "Endpoint URL:", Location = new Point(0, y + 3), Width = labelWidth }; + azurePanel.Controls.Add(endpointLabel); + azureEndpointTextBox = new TextBox { Location = new Point(labelWidth + 10, y), Width = controlWidth }; + azurePanel.Controls.Add(azureEndpointTextBox); + y += 35; + + // API Key + var apiKeyLabel = new Label { Text = "API Key:", Location = new Point(0, y + 3), Width = labelWidth }; + azurePanel.Controls.Add(apiKeyLabel); + azureApiKeyTextBox = new TextBox + { + Location = new Point(labelWidth + 10, y), + Width = controlWidth, + UseSystemPasswordChar = true + }; + azurePanel.Controls.Add(azureApiKeyTextBox); + y += 35; + + // Temperature (Newly added) + var tempLabel = new Label { Text = "Temperature:", Location = new Point(0, y + 3), Width = labelWidth }; + azurePanel.Controls.Add(tempLabel); + azureTemperatureUpDown = new NumericUpDown + { + Location = new Point(labelWidth + 10, y), + Width = 100, + Minimum = 0, + Maximum = 2, + DecimalPlaces = 2, + Increment = 0.1M, + Value = 0.7M + }; + azurePanel.Controls.Add(azureTemperatureUpDown); + y += 35; + + // Help text + var helpLabel = new Label + { + Text = "Get your Azure OpenAI credentials from:\nhttps://portal.azure.com → Azure OpenAI → Keys and Endpoint", + Location = new Point(0, y), + Size = new Size(550, 40), + ForeColor = Color.Gray + }; + azurePanel.Controls.Add(helpLabel); + + configPanel.Controls.Add(azurePanel); + } + + private void CreateLMStudioPanel() + { + lmStudioPanel = new Panel + { + Location = new Point(0, 0), + Size = new Size(580, 300), + Visible = false + }; + + int y = 0; + int labelWidth = 150; + int controlWidth = 300; // Reduced width to make room for Auto-Detect button + + // Endpoint URL + var endpointLabel = new Label { Text = "Server Endpoint:", Location = new Point(0, y + 3), Width = labelWidth }; + lmStudioPanel.Controls.Add(endpointLabel); + + lmStudioEndpointTextBox = new TextBox + { + Location = new Point(labelWidth + 10, y), + Width = controlWidth, + Text = "http://localhost:1234/v1" + }; + lmStudioPanel.Controls.Add(lmStudioEndpointTextBox); + + // Auto-Detect Button + var autoDetectButton = new Button + { + Text = "Auto-Detect", + Location = new Point(labelWidth + controlWidth + 20, y - 1), + Width = 100, + Height = 23, + BackColor = Color.AliceBlue + }; + autoDetectButton.Click += AutoDetectButton_Click; + lmStudioPanel.Controls.Add(autoDetectButton); + + y += 35; + + // Model Name + var modelLabel = new Label { Text = "Model Name:", Location = new Point(0, y + 3), Width = labelWidth }; + lmStudioPanel.Controls.Add(modelLabel); + lmStudioModelTextBox = new TextBox + { + Location = new Point(labelWidth + 10, y), + Width = 400, + Text = "local-model" + }; + lmStudioPanel.Controls.Add(lmStudioModelTextBox); + y += 35; + + // Temperature + var tempLabel = new Label { Text = "Temperature:", Location = new Point(0, y + 3), Width = labelWidth }; + lmStudioPanel.Controls.Add(tempLabel); + lmStudioTemperatureUpDown = new NumericUpDown + { + Location = new Point(labelWidth + 10, y), + Width = 100, + Minimum = 1, // Fixed to 1 + Maximum = 1, // Fixed to 1 + DecimalPlaces = 1, // Fixed to 1 decimal place + Increment = 0.0M, // No increment as it's fixed + Value = 1.0M, // Fixed value + Enabled = false // Disable user input + }; + lmStudioPanel.Controls.Add(lmStudioTemperatureUpDown); + + // Add an info label for temperature + var tempInfoLabel = new Label + { + Text = "Fixed at 1.0 for LM Studio models.", + Location = new Point(labelWidth + 115, y + 3), + AutoSize = true, + ForeColor = Color.DarkGray + }; + lmStudioPanel.Controls.Add(tempInfoLabel); + y += 35; + + // Max Tokens + var tokensLabel = new Label { Text = "Max Tokens:", Location = new Point(0, y + 3), Width = labelWidth }; + lmStudioPanel.Controls.Add(tokensLabel); + lmStudioMaxTokensUpDown = new NumericUpDown + { + Location = new Point(labelWidth + 10, y), + Width = 100, + Minimum = 128, + Maximum = 1000000, // Increased to support large context models + Increment = 128, + Value = 2048 + }; + lmStudioPanel.Controls.Add(lmStudioMaxTokensUpDown); + y += 40; + + // Help text + var helpLabel = new Label + { + Text = "1. Download LM Studio from https://lmstudio.ai/\n" + + "2. Load a model (recommended: Hermes-2-Pro-Mistral-7B)\n" + + "3. Click 'Start Server' in LM Studio\n" + + "4. Make sure the endpoint matches (usually http://localhost:1234/v1)", + Location = new Point(0, y), + Size = new Size(550, 80), + ForeColor = Color.DarkGreen + }; + lmStudioPanel.Controls.Add(helpLabel); + + configPanel.Controls.Add(lmStudioPanel); + } + + private async void AutoDetectButton_Click(object sender, EventArgs e) + { + statusLabel.Text = "Searching for local AI server..."; + statusLabel.ForeColor = Color.Blue; + + string[] commonEndpoints = new[] + { + "http://localhost:1234/v1", // LM Studio default + "http://127.0.0.1:1234/v1", // LM Studio IP + "http://localhost:11434/v1", // Ollama default + "http://localhost:5000/v1", // LocalAI/Oobabooga default + "http://localhost:8080/v1" // Llama.cpp server + }; + + foreach (var endpoint in commonEndpoints) + { + try + { + var client = new OpenAI.OpenAIClient( + new System.ClientModel.ApiKeyCredential("any-key"), + new OpenAI.OpenAIClientOptions { Endpoint = new Uri(endpoint) }); + + // Just try to list models or verify endpoint validity + // Note: OpenAI client doesn't have a simple 'ping', so we assume if Uri creation works + // and we can create a client, it's a candidate. A real ping would require an API call. + // Let's try a lightweight API call to verify. + + // Create a dummy chat client to test connectivity + var chatClient = client.GetChatClient("test-model"); + var ichatClient = (Microsoft.Extensions.AI.IChatClient)(object)chatClient; + + var messages = new System.Collections.Generic.List + { + new Microsoft.Extensions.AI.ChatMessage(Microsoft.Extensions.AI.ChatRole.User, "hi") + }; + + // Set a short timeout for detection + // Note: .NET 4.8 async timeout cancellation is tricky, relying on fast failure + try { + // We don't actually wait for a full response, just seeing if connection is refused immediately + // If it hangs, it might be a valid server processing. + // For now, let's just assume the first valid URI that doesn't throw immediate connection refused is good. + await ichatClient.GetResponseAsync(messages); + } + catch (Exception ex) when (ex.Message.Contains("404") || !ex.Message.Contains("connection")) + { + // 404 means server is there but model not found - that's a success for finding the server! + // Not connection error means we reached something. + } + + lmStudioEndpointTextBox.Text = endpoint; + statusLabel.Text = $"✓ Found server at {endpoint}!"; + statusLabel.ForeColor = Color.Green; + return; + } + catch + { + // Continue to next endpoint + } + } + + statusLabel.Text = "✗ No local server found. Is LM Studio running?"; + statusLabel.ForeColor = Color.Red; + } + + private async Task TestLMStudioConnection() + { + try + { + if (!Uri.TryCreate(lmStudioEndpointTextBox.Text, UriKind.Absolute, out Uri result)) + { + throw new UriFormatException("Invalid Endpoint URL format. It should look like: http://localhost:1234/v1"); + } + + var client = new OpenAI.OpenAIClient( + new System.ClientModel.ApiKeyCredential("lm-studio"), + new OpenAI.OpenAIClientOptions { Endpoint = new Uri(lmStudioEndpointTextBox.Text) }); + + var chatClient = client.GetChatClient(lmStudioModelTextBox.Text); + + var messages = new System.Collections.Generic.List + { + new Microsoft.Extensions.AI.ChatMessage( + Microsoft.Extensions.AI.ChatRole.User, + "Say 'test' in one word") + }; + + // Cast to IChatClient - can't use AsIChatClient on OpenAI.ChatClient directly in .NET 4.8 + var ichatClient = (Microsoft.Extensions.AI.IChatClient)(object)chatClient; + var response = await ichatClient.GetResponseAsync(messages); + + statusLabel.Text = "✓ LM Studio connection successful!"; + statusLabel.ForeColor = Color.Green; + } + catch (Exception ex) + { + if (ex.Message.Contains("Connection refused") || ex.Message.Contains("No connection")) + { + statusLabel.Text = "✗ Cannot connect. Is LM Studio running with server started?"; + } + else + { + statusLabel.Text = $"✗ Connection failed: {ex.Message}"; + } + statusLabel.ForeColor = Color.Red; + } + } + + private void LoadConfiguration() + { + // Try to load existing configuration to determine current provider + var lmConfig = LMStudioConfig.LoadConfig(); + var azureConfig = APIConfig.LoadConfig(currentModel); + + // Load global tool config for syncing temperature + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + + if (lmConfig.Enabled) + { + // LM Studio is enabled + providerComboBox.SelectedIndex = 1; // LM Studio + enableProviderCheckBox.Checked = true; + + lmStudioEndpointTextBox.Text = lmConfig.EndpointURL; + lmStudioModelTextBox.Text = lmConfig.ModelName; + lmStudioTemperatureUpDown.Value = (decimal)lmConfig.Temperature; + + // Safely set max tokens + if (lmConfig.MaxTokens < lmStudioMaxTokensUpDown.Minimum) + lmStudioMaxTokensUpDown.Value = lmStudioMaxTokensUpDown.Minimum; + else if (lmConfig.MaxTokens > lmStudioMaxTokensUpDown.Maximum) + lmStudioMaxTokensUpDown.Value = lmStudioMaxTokensUpDown.Maximum; + else + lmStudioMaxTokensUpDown.Value = lmConfig.MaxTokens; + } + else if (azureConfig.ProviderType == "Gemini") + { + // Gemini is enabled + providerComboBox.SelectedIndex = 2; // Google Gemini + enableProviderCheckBox.Checked = true; + + geminiApiKeyTextBox.Text = azureConfig.APIKey; + geminiModelTextBox.Text = azureConfig.DeploymentName; + } + else + { + // Azure OpenAI (default) + providerComboBox.SelectedIndex = 0; // Azure OpenAI + enableProviderCheckBox.Checked = true; + + azureDeploymentTextBox.Text = azureConfig.DeploymentName; + azureEndpointTextBox.Text = azureConfig.EndpointURL; + azureApiKeyTextBox.Text = azureConfig.APIKey; + + // Init Azure temperature control (use ToolConfig temperature if available, else default) + azureTemperatureUpDown.Value = (decimal)toolConfig.Temperature; + } + } + + private void ProviderComboBox_SelectedIndexChanged(object sender, EventArgs e) + { + // Hide all panels + azurePanel.Visible = false; + lmStudioPanel.Visible = false; + geminiPanel.Visible = false; + + // Show selected panel + switch (providerComboBox.SelectedIndex) + { + case 0: // Azure OpenAI + azurePanel.Visible = true; + break; + case 1: // LM Studio + lmStudioPanel.Visible = true; + break; + case 2: // Google Gemini + geminiPanel.Visible = true; + break; + case 3: // GitHub Models + MessageBox.Show("GitHub Models support coming soon!\nFor now, configure as Azure OpenAI with GitHub endpoint.", + "Coming Soon", MessageBoxButtons.OK, MessageBoxIcon.Information); + providerComboBox.SelectedIndex = 0; + break; + } + } + + private async void TestButton_Click(object sender, EventArgs e) + { + statusLabel.Text = "Testing connection..."; + statusLabel.ForeColor = Color.Blue; + testButton.Enabled = false; + + try + { + if (providerComboBox.SelectedIndex == 1) // LM Studio + { + await TestLMStudioConnection(); + } + else if (providerComboBox.SelectedIndex == 2) // Gemini + { + var client = new OpenAI.OpenAIClient( + new System.ClientModel.ApiKeyCredential(geminiApiKeyTextBox.Text), + new OpenAI.OpenAIClientOptions { Endpoint = new Uri("https://generativelanguage.googleapis.com/v1beta/openai/") } + ); + var chatClient = client.GetChatClient(geminiModelTextBox.Text); + var ichatClient = (Microsoft.Extensions.AI.IChatClient)(object)chatClient; + await ichatClient.GetResponseAsync(new System.Collections.Generic.List{ + new Microsoft.Extensions.AI.ChatMessage(Microsoft.Extensions.AI.ChatRole.User, "hi") + }); + statusLabel.Text = "✓ Gemini connection successful!"; + statusLabel.ForeColor = Color.Green; + } + else // Azure OpenAI + { + await TestAzureConnection(); + } + } + catch (Exception ex) + { + statusLabel.Text = $"Test failed: {ex.Message}"; + statusLabel.ForeColor = Color.Red; + } + finally + { + testButton.Enabled = true; + } + } + + private async Task TestAzureConnection() + { + try + { + var client = new Azure.AI.OpenAI.AzureOpenAIClient( + new Uri(azureEndpointTextBox.Text), + new Azure.AzureKeyCredential(azureApiKeyTextBox.Text)); + + var chatClient = client.GetChatClient(azureDeploymentTextBox.Text); + // Use cast for .NET 4.8 compatibility + var ichatClient = (Microsoft.Extensions.AI.IChatClient)(object)chatClient; + + var messages = new System.Collections.Generic.List + { + new Microsoft.Extensions.AI.ChatMessage( + Microsoft.Extensions.AI.ChatRole.User, + "Say 'test' in one word") + }; + + var response = await ichatClient.GetResponseAsync(messages); + + statusLabel.Text = "✓ Azure OpenAI connection successful!"; + statusLabel.ForeColor = Color.Green; + } + catch (Exception ex) + { + statusLabel.Text = $"✗ Connection failed: {ex.Message}"; + statusLabel.ForeColor = Color.Red; + } + } + + private void SaveButton_Click(object sender, EventArgs e) + { + try + { + if (providerComboBox.SelectedIndex == 1) // LM Studio + { + SaveLMStudioConfig(); + } + else if (providerComboBox.SelectedIndex == 2) // Gemini + { + SaveGeminiConfig(); + } + else // Azure OpenAI + { + SaveAzureConfig(); + } + + MessageBox.Show( + $"Configuration saved successfully!\n\n" + + $"Provider: {providerComboBox.SelectedItem}\n" + + $"Status: {(enableProviderCheckBox.Checked ? "Enabled" : "Disabled")}\n\n" + + $"The new provider will be used immediately.", + "Configuration Saved", + MessageBoxButtons.OK, + MessageBoxIcon.Information); + + this.DialogResult = DialogResult.OK; + this.Close(); + } + catch (Exception ex) + { + MessageBox.Show($"Error saving configuration: {ex.Message}", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); + } + } + + private void SaveGeminiConfig() + { + var config = new APIConfig + { + DeploymentName = geminiModelTextBox.Text, + EndpointURL = "https://generativelanguage.googleapis.com/v1beta/openai/", + APIKey = geminiApiKeyTextBox.Text, + ProviderType = "Gemini" + }; + config.SaveConfig(currentModel); + + // Disable LM Studio + var lmConfig = LMStudioConfig.LoadConfig(); + lmConfig.Enabled = false; + lmConfig.SaveConfig(); + } + + private void SaveAzureConfig() + { + var config = new APIConfig + { + DeploymentName = azureDeploymentTextBox.Text, + EndpointURL = azureEndpointTextBox.Text, + APIKey = azureApiKeyTextBox.Text, + ProviderType = "AzureOpenAI" + }; + config.SaveConfig(currentModel); + + // Disable LM Studio if Azure is being configured + var lmConfig = LMStudioConfig.LoadConfig(); + lmConfig.Enabled = false; + lmConfig.SaveConfig(); + + // Sync global tool config temperature + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + toolConfig.Temperature = (double)azureTemperatureUpDown.Value; + toolConfig.SaveConfig("toolsconfig"); + } + + private void SaveLMStudioConfig() + { + if (!Uri.TryCreate(lmStudioEndpointTextBox.Text, UriKind.Absolute, out Uri result)) + { + throw new UriFormatException("Invalid Endpoint URL format. It should look like: http://localhost:1234/v1"); + } + + var config = new LMStudioConfig + { + EndpointURL = lmStudioEndpointTextBox.Text, + ModelName = lmStudioModelTextBox.Text, + Temperature = (double)lmStudioTemperatureUpDown.Value, + MaxTokens = (int)lmStudioMaxTokensUpDown.Value, + Enabled = enableProviderCheckBox.Checked + }; + config.SaveConfig(); + + // Also save to Azure config for compatibility + var azureConfig = new APIConfig + { + DeploymentName = lmStudioModelTextBox.Text, + EndpointURL = lmStudioEndpointTextBox.Text, + APIKey = "lm-studio", + ProviderType = "LMStudio" + }; + azureConfig.SaveConfig(currentModel); + + // Sync global tool config temperature + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + toolConfig.Temperature = (double)lmStudioTemperatureUpDown.Value; + toolConfig.SaveConfig("toolsconfig"); + } + } +} diff --git a/FlowVision/ConfigForm.Designer.cs b/FlowVision/ConfigForm.Designer.cs index 8c8a065..353eecc 100644 --- a/FlowVision/ConfigForm.Designer.cs +++ b/FlowVision/ConfigForm.Designer.cs @@ -43,7 +43,7 @@ private void InitializeComponent() this.DeploymentNameLabel.Font = new System.Drawing.Font("Comic Sans MS", 14F); this.DeploymentNameLabel.Location = new System.Drawing.Point(12, 9); this.DeploymentNameLabel.Name = "DeploymentNameLabel"; - this.DeploymentNameLabel.Size = new System.Drawing.Size(170, 26); + this.DeploymentNameLabel.Size = new System.Drawing.Size(121, 26); this.DeploymentNameLabel.TabIndex = 0; this.DeploymentNameLabel.Text = "Model Name"; // @@ -115,10 +115,13 @@ private void InitializeComponent() this.Controls.Add(this.deploymentNameTextBox); this.Controls.Add(this.EndpointURLLabel); this.Controls.Add(this.DeploymentNameLabel); + this.MaximizeBox = false; this.MaximumSize = new System.Drawing.Size(700, 200); + this.MinimizeBox = false; this.MinimumSize = new System.Drawing.Size(700, 200); this.Name = "ConfigForm"; - this.Text = "ConfigForm"; + this.ShowIcon = false; + this.Text = "Model Config"; this.ResumeLayout(false); this.PerformLayout(); diff --git a/FlowVision/FlowVision.csproj b/FlowVision/FlowVision.csproj index 28aa502..82f8f5b 100644 --- a/FlowVision/FlowVision.csproj +++ b/FlowVision/FlowVision.csproj @@ -13,6 +13,7 @@ 512 true true + latest @@ -25,6 +26,7 @@ DEBUG;TRACE prompt 4 + true AnyCPU @@ -34,6 +36,10 @@ TRACE prompt 4 + true + + + recursive-control-icon.ico @@ -45,12 +51,6 @@ ..\packages\Azure.Core.1.45.0\lib\net472\Azure.Core.dll - - ..\packages\CefSharp.Common.135.0.170\lib\net462\CefSharp.dll - - - ..\packages\CefSharp.Common.135.0.170\lib\net462\CefSharp.Core.dll - ..\packages\Costura.Fody.6.0.0\lib\netstandard2.0\Costura.dll @@ -75,6 +75,9 @@ ..\packages\Microsoft.Extensions.AI.AzureAIInference.9.4.0-preview.1.25207.5\lib\net462\Microsoft.Extensions.AI.AzureAIInference.dll + + ..\packages\Microsoft.Extensions.AI.OpenAI.9.4.0-preview.1.25207.5\lib\net462\Microsoft.Extensions.AI.OpenAI.dll + ..\packages\Microsoft.Extensions.Caching.Abstractions.10.0.0-preview.3.25171.5\lib\net462\Microsoft.Extensions.Caching.Abstractions.dll @@ -93,6 +96,9 @@ ..\packages\Microsoft.Extensions.VectorData.Abstractions.9.0.0-preview.1.25161.1\lib\net462\Microsoft.Extensions.VectorData.Abstractions.dll + + ..\packages\Microsoft.Playwright.1.52.0\lib\netstandard2.0\Microsoft.Playwright.dll + ..\packages\Microsoft.SemanticKernel.1.47.0\lib\netstandard2.0\Microsoft.SemanticKernel.dll @@ -124,6 +130,10 @@ ..\packages\System.ClientModel.1.4.0-beta.4\lib\netstandard2.0\System.ClientModel.dll + + ..\packages\System.ComponentModel.Annotations.5.0.0\lib\net461\System.ComponentModel.Annotations.dll + + ..\packages\System.Diagnostics.DiagnosticSource.10.0.0-preview.3.25171.5\lib\net462\System.Diagnostics.DiagnosticSource.dll @@ -147,6 +157,7 @@ ..\packages\System.Runtime.CompilerServices.Unsafe.6.1.2\lib\net462\System.Runtime.CompilerServices.Unsafe.dll + ..\packages\System.Text.Encodings.Web.10.0.0-preview.3.25171.5\lib\net462\System.Text.Encodings.Web.dll @@ -159,6 +170,10 @@ ..\packages\System.Threading.Tasks.Extensions.4.6.3\lib\net462\System.Threading.Tasks.Extensions.dll + + ..\packages\Tesseract.5.2.0\lib\net48\Tesseract.dll + True + @@ -168,23 +183,49 @@ + + + C:\Program Files (x86)\Windows Kits\10\UnionMetadata\10.0.19041.0\Windows.winmd + + + Form + Form ConfigForm.cs + + Form + Form Form1.cs + + + + + + + + - + + + + + + + + + Form @@ -194,20 +235,23 @@ - - + + + + + UserControl + + + UserControl + + + - - Form - - - OmniParserForm.cs - @@ -217,9 +261,6 @@ Form1.cs - - OmniParserForm.cs - ResXFileCodeGenerator Resources.Designer.cs @@ -234,6 +275,7 @@ ToolConfigForm.cs Designer + SettingsSingleFileGenerator @@ -245,7 +287,9 @@ True - + + + @@ -255,8 +299,19 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/FlowVision/FodyWeavers.xml b/FlowVision/FodyWeavers.xml index 5029e70..a14d1b0 100644 --- a/FlowVision/FodyWeavers.xml +++ b/FlowVision/FodyWeavers.xml @@ -1,3 +1,25 @@ - + + + true + + + + leptonica-1.82.0 + tesseract50 + + + + leptonica-1.82.0 + tesseract50 + + + + leptonica-1.82.0 + tesseract50 + + + + $(MSBuildThisFileDirectory)..\packages\Tesseract.5.2.0\x64\ + \ No newline at end of file diff --git a/FlowVision/Form1.Designer.cs b/FlowVision/Form1.Designer.cs index 38b5339..b9bd0a0 100644 --- a/FlowVision/Form1.Designer.cs +++ b/FlowVision/Form1.Designer.cs @@ -17,6 +17,13 @@ protected override void Dispose(bool disposing) { components.Dispose(); } + + // Clean up speech recognition resources + if (speechRecognition != null) + { + speechRecognition.Dispose(); + } + base.Dispose(disposing); } @@ -32,13 +39,26 @@ private void InitializeComponent() this.filesToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.toolsToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.newChatToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.exportToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.exportToJSONToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.exportToMarkdownToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.exportDebugLogToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.copyToClipboardToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.settingsToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.agentsToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.actionerAgentToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.plannerAgentToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.coordinatorAgentToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.githubAgentToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.multiAgentModeToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.visionToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.omniParserToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); - this.lLMToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); - this.configureToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); - this.azureOpenAIToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); - this.githubToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); - this.reasonToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.viewToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.activityMonitorToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.executionVisualizerToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.helpToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.aboutToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); + this.documentationToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem(); this.mainPanel = new System.Windows.Forms.Panel(); this.menuStrip2.SuspendLayout(); this.SuspendLayout(); @@ -47,9 +67,9 @@ private void InitializeComponent() // this.menuStrip2.Items.AddRange(new System.Windows.Forms.ToolStripItem[] { this.filesToolStripMenuItem, - this.visionToolStripMenuItem, - this.lLMToolStripMenuItem, - this.reasonToolStripMenuItem}); + this.settingsToolStripMenuItem, + this.viewToolStripMenuItem, + this.helpToolStripMenuItem}); this.menuStrip2.Location = new System.Drawing.Point(0, 0); this.menuStrip2.Name = "menuStrip2"; this.menuStrip2.Size = new System.Drawing.Size(367, 24); @@ -60,7 +80,8 @@ private void InitializeComponent() // this.filesToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { this.toolsToolStripMenuItem, - this.newChatToolStripMenuItem}); + this.newChatToolStripMenuItem, + this.exportToolStripMenuItem}); this.filesToolStripMenuItem.Name = "filesToolStripMenuItem"; this.filesToolStripMenuItem.Size = new System.Drawing.Size(37, 20); this.filesToolStripMenuItem.Text = "File"; @@ -68,69 +89,176 @@ private void InitializeComponent() // toolsToolStripMenuItem // this.toolsToolStripMenuItem.Name = "toolsToolStripMenuItem"; - this.toolsToolStripMenuItem.Size = new System.Drawing.Size(126, 22); + this.toolsToolStripMenuItem.Size = new System.Drawing.Size(180, 22); this.toolsToolStripMenuItem.Text = "Tools"; this.toolsToolStripMenuItem.Click += new System.EventHandler(this.toolsToolStripMenuItem_Click); // // newChatToolStripMenuItem // this.newChatToolStripMenuItem.Name = "newChatToolStripMenuItem"; - this.newChatToolStripMenuItem.Size = new System.Drawing.Size(126, 22); + this.newChatToolStripMenuItem.Size = new System.Drawing.Size(180, 22); this.newChatToolStripMenuItem.Text = "New Chat"; this.newChatToolStripMenuItem.Click += new System.EventHandler(this.newChatToolStripMenuItem_Click); // + // exportToolStripMenuItem + // + this.exportToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { + this.exportToJSONToolStripMenuItem, + this.exportToMarkdownToolStripMenuItem, + this.exportDebugLogToolStripMenuItem, + this.copyToClipboardToolStripMenuItem}); + this.exportToolStripMenuItem.Name = "exportToolStripMenuItem"; + this.exportToolStripMenuItem.Size = new System.Drawing.Size(180, 22); + this.exportToolStripMenuItem.Text = "Export Chat"; + // + // exportToJSONToolStripMenuItem + // + this.exportToJSONToolStripMenuItem.Name = "exportToJSONToolStripMenuItem"; + this.exportToJSONToolStripMenuItem.Size = new System.Drawing.Size(220, 22); + this.exportToJSONToolStripMenuItem.Text = "Export to JSON"; + this.exportToJSONToolStripMenuItem.Click += new System.EventHandler(this.exportToJSONToolStripMenuItem_Click); + // + // exportToMarkdownToolStripMenuItem + // + this.exportToMarkdownToolStripMenuItem.Name = "exportToMarkdownToolStripMenuItem"; + this.exportToMarkdownToolStripMenuItem.Size = new System.Drawing.Size(220, 22); + this.exportToMarkdownToolStripMenuItem.Text = "Export to Markdown"; + this.exportToMarkdownToolStripMenuItem.Click += new System.EventHandler(this.exportToMarkdownToolStripMenuItem_Click); + // + // exportDebugLogToolStripMenuItem + // + this.exportDebugLogToolStripMenuItem.Name = "exportDebugLogToolStripMenuItem"; + this.exportDebugLogToolStripMenuItem.Size = new System.Drawing.Size(220, 22); + this.exportDebugLogToolStripMenuItem.Text = "Export Debug Log (with Tools)"; + this.exportDebugLogToolStripMenuItem.Click += new System.EventHandler(this.exportDebugLogToolStripMenuItem_Click); + // + // copyToClipboardToolStripMenuItem + // + this.copyToClipboardToolStripMenuItem.Name = "copyToClipboardToolStripMenuItem"; + this.copyToClipboardToolStripMenuItem.Size = new System.Drawing.Size(220, 22); + this.copyToClipboardToolStripMenuItem.Text = "Copy to Clipboard"; + this.copyToClipboardToolStripMenuItem.Click += new System.EventHandler(this.copyToClipboardToolStripMenuItem_Click); + // // visionToolStripMenuItem // this.visionToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { this.omniParserToolStripMenuItem}); this.visionToolStripMenuItem.Name = "visionToolStripMenuItem"; - this.visionToolStripMenuItem.Size = new System.Drawing.Size(51, 20); - this.visionToolStripMenuItem.Text = "Vision"; + this.visionToolStripMenuItem.Size = new System.Drawing.Size(180, 22); + this.visionToolStripMenuItem.Text = "🔭 Vision Tools"; // // omniParserToolStripMenuItem // this.omniParserToolStripMenuItem.Name = "omniParserToolStripMenuItem"; - this.omniParserToolStripMenuItem.Size = new System.Drawing.Size(136, 22); - this.omniParserToolStripMenuItem.Text = "OmniParser"; + this.omniParserToolStripMenuItem.Size = new System.Drawing.Size(200, 22); + this.omniParserToolStripMenuItem.Text = "📸 OmniParser Config"; this.omniParserToolStripMenuItem.Click += new System.EventHandler(this.omniParserToolStripMenuItem_Click); // - // lLMToolStripMenuItem + // settingsToolStripMenuItem + // + this.settingsToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { + this.toolsToolStripMenuItem, + this.agentsToolStripMenuItem, + this.visionToolStripMenuItem, + this.multiAgentModeToolStripMenuItem}); + this.settingsToolStripMenuItem.Name = "settingsToolStripMenuItem"; + this.settingsToolStripMenuItem.Size = new System.Drawing.Size(61, 20); + this.settingsToolStripMenuItem.Text = "⚙️ Setup"; + // + // agentsToolStripMenuItem + // + this.agentsToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { + this.actionerAgentToolStripMenuItem, + this.plannerAgentToolStripMenuItem, + this.coordinatorAgentToolStripMenuItem, + this.githubAgentToolStripMenuItem}); + this.agentsToolStripMenuItem.Name = "agentsToolStripMenuItem"; + this.agentsToolStripMenuItem.Size = new System.Drawing.Size(180, 22); + this.agentsToolStripMenuItem.Text = "🤖 AI Agents"; + // + // actionerAgentToolStripMenuItem + // + this.actionerAgentToolStripMenuItem.Name = "actionerAgentToolStripMenuItem"; + this.actionerAgentToolStripMenuItem.Size = new System.Drawing.Size(250, 22); + this.actionerAgentToolStripMenuItem.Text = "⚡ Actioner Agent (Primary)"; + this.actionerAgentToolStripMenuItem.Click += new System.EventHandler(this.actionerAgentToolStripMenuItem_Click); + // + // plannerAgentToolStripMenuItem + // + this.plannerAgentToolStripMenuItem.Name = "plannerAgentToolStripMenuItem"; + this.plannerAgentToolStripMenuItem.Size = new System.Drawing.Size(250, 22); + this.plannerAgentToolStripMenuItem.Text = "📋 Planner Agent"; + this.plannerAgentToolStripMenuItem.Click += new System.EventHandler(this.plannerAgentToolStripMenuItem_Click); + // + // coordinatorAgentToolStripMenuItem + // + this.coordinatorAgentToolStripMenuItem.Name = "coordinatorAgentToolStripMenuItem"; + this.coordinatorAgentToolStripMenuItem.Size = new System.Drawing.Size(250, 22); + this.coordinatorAgentToolStripMenuItem.Text = "🎯 Coordinator Agent"; + this.coordinatorAgentToolStripMenuItem.Click += new System.EventHandler(this.coordinatorAgentToolStripMenuItem_Click); + // + // githubAgentToolStripMenuItem + // + this.githubAgentToolStripMenuItem.Name = "githubAgentToolStripMenuItem"; + this.githubAgentToolStripMenuItem.Size = new System.Drawing.Size(250, 22); + this.githubAgentToolStripMenuItem.Text = "🐙 GitHub Agent"; + this.githubAgentToolStripMenuItem.Click += new System.EventHandler(this.githubAgentToolStripMenuItem_Click); + // + // multiAgentModeToolStripMenuItem + // + this.multiAgentModeToolStripMenuItem.CheckOnClick = true; + this.multiAgentModeToolStripMenuItem.Name = "multiAgentModeToolStripMenuItem"; + this.multiAgentModeToolStripMenuItem.Size = new System.Drawing.Size(180, 22); + this.multiAgentModeToolStripMenuItem.Text = "🔀 Multi-Agent Mode"; + this.multiAgentModeToolStripMenuItem.Click += new System.EventHandler(this.multiAgentModeToolStripMenuItem_Click); + // + // viewToolStripMenuItem + // + this.viewToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { + this.activityMonitorToolStripMenuItem, + this.executionVisualizerToolStripMenuItem}); + this.viewToolStripMenuItem.Name = "viewToolStripMenuItem"; + this.viewToolStripMenuItem.Size = new System.Drawing.Size(44, 20); + this.viewToolStripMenuItem.Text = "👁️ View"; + // + // activityMonitorToolStripMenuItem // - this.lLMToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { - this.configureToolStripMenuItem}); - this.lLMToolStripMenuItem.Name = "lLMToolStripMenuItem"; - this.lLMToolStripMenuItem.Size = new System.Drawing.Size(42, 20); - this.lLMToolStripMenuItem.Text = "LLM"; + this.activityMonitorToolStripMenuItem.CheckOnClick = true; + this.activityMonitorToolStripMenuItem.Name = "activityMonitorToolStripMenuItem"; + this.activityMonitorToolStripMenuItem.Size = new System.Drawing.Size(200, 22); + this.activityMonitorToolStripMenuItem.Text = "📊 Activity Monitor"; + this.activityMonitorToolStripMenuItem.Click += new System.EventHandler(this.activityMonitorToolStripMenuItem_Click); // - // configureToolStripMenuItem + // executionVisualizerToolStripMenuItem // - this.configureToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { - this.azureOpenAIToolStripMenuItem, - this.githubToolStripMenuItem}); - this.configureToolStripMenuItem.Name = "configureToolStripMenuItem"; - this.configureToolStripMenuItem.Size = new System.Drawing.Size(104, 22); - this.configureToolStripMenuItem.Text = "Setup"; - this.configureToolStripMenuItem.Click += new System.EventHandler(this.configureToolStripMenuItem_Click); + this.executionVisualizerToolStripMenuItem.CheckOnClick = true; + this.executionVisualizerToolStripMenuItem.Name = "executionVisualizerToolStripMenuItem"; + this.executionVisualizerToolStripMenuItem.Size = new System.Drawing.Size(200, 22); + this.executionVisualizerToolStripMenuItem.Text = "🎯 Execution Visualizer"; + this.executionVisualizerToolStripMenuItem.Click += new System.EventHandler(this.executionVisualizerToolStripMenuItem_Click); // - // azureOpenAIToolStripMenuItem + // helpToolStripMenuItem // - this.azureOpenAIToolStripMenuItem.Name = "azureOpenAIToolStripMenuItem"; - this.azureOpenAIToolStripMenuItem.Size = new System.Drawing.Size(147, 22); - this.azureOpenAIToolStripMenuItem.Text = "Azure OpenAI"; - this.azureOpenAIToolStripMenuItem.Click += new System.EventHandler(this.azureOpenAIToolStripMenuItem_Click); + this.helpToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] { + this.aboutToolStripMenuItem, + this.documentationToolStripMenuItem}); + this.helpToolStripMenuItem.Name = "helpToolStripMenuItem"; + this.helpToolStripMenuItem.Size = new System.Drawing.Size(44, 20); + this.helpToolStripMenuItem.Text = "❓ Help"; // - // githubToolStripMenuItem + // aboutToolStripMenuItem // - this.githubToolStripMenuItem.Name = "githubToolStripMenuItem"; - this.githubToolStripMenuItem.Size = new System.Drawing.Size(147, 22); - this.githubToolStripMenuItem.Text = "Github"; - this.githubToolStripMenuItem.Click += new System.EventHandler(this.githubToolStripMenuItem_Click); + this.aboutToolStripMenuItem.Name = "aboutToolStripMenuItem"; + this.aboutToolStripMenuItem.Size = new System.Drawing.Size(200, 22); + this.aboutToolStripMenuItem.Text = "ℹ️ About"; + this.aboutToolStripMenuItem.Click += new System.EventHandler(this.aboutToolStripMenuItem_Click); // - // reasonToolStripMenuItem + // documentationToolStripMenuItem // - this.reasonToolStripMenuItem.Name = "reasonToolStripMenuItem"; - this.reasonToolStripMenuItem.Size = new System.Drawing.Size(57, 20); - this.reasonToolStripMenuItem.Text = "Reason"; + this.documentationToolStripMenuItem.Name = "documentationToolStripMenuItem"; + this.documentationToolStripMenuItem.Size = new System.Drawing.Size(200, 22); + this.documentationToolStripMenuItem.Text = "📚 Documentation"; + this.documentationToolStripMenuItem.Click += new System.EventHandler(this.documentationToolStripMenuItem_Click); // // mainPanel // @@ -161,16 +289,29 @@ private void InitializeComponent() #endregion private System.Windows.Forms.MenuStrip menuStrip2; private System.Windows.Forms.ToolStripMenuItem filesToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem settingsToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem agentsToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem actionerAgentToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem plannerAgentToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem coordinatorAgentToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem githubAgentToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem visionToolStripMenuItem; - private System.Windows.Forms.ToolStripMenuItem lLMToolStripMenuItem; - private System.Windows.Forms.ToolStripMenuItem reasonToolStripMenuItem; - private System.Windows.Forms.ToolStripMenuItem configureToolStripMenuItem; - private System.Windows.Forms.ToolStripMenuItem azureOpenAIToolStripMenuItem; private System.Windows.Forms.Panel mainPanel; - private System.Windows.Forms.ToolStripMenuItem githubToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem omniParserToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem toolsToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem newChatToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem exportToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem exportToJSONToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem exportToMarkdownToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem exportDebugLogToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem copyToClipboardToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem multiAgentModeToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem viewToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem activityMonitorToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem executionVisualizerToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem helpToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem aboutToolStripMenuItem; + private System.Windows.Forms.ToolStripMenuItem documentationToolStripMenuItem; } } diff --git a/FlowVision/Form1.cs b/FlowVision/Form1.cs index f2849bd..cbe789e 100644 --- a/FlowVision/Form1.cs +++ b/FlowVision/Form1.cs @@ -8,6 +8,10 @@ using System.Threading.Tasks; using System.Windows.Forms; using FlowVision.lib.Classes; +using Microsoft.Extensions.AI; +// Add System.Speech namespace +using System.Speech.Recognition; +using FlowVision.lib.UI; namespace FlowVision { @@ -16,7 +20,12 @@ public partial class Form1 : Form private FlowLayoutPanel messagesPanel; private RichTextBox userInputTextBox; private Button sendButton; - private List chatHistory = new List(); + private List chatHistory = new List(); + private Button microphoneButton; // New microphone button + // Speech recognition components + private SpeechRecognitionService speechRecognition; + private bool isListening = false; + private ThemeManager _themeManager; // Add ThemeManager field declaration // Add a delegate for handling plugin output messages public delegate void PluginOutputHandler(string message); @@ -26,29 +35,74 @@ public Form1() InitializeComponent(); } - private void toolStripMenuItem1_Click(object sender, EventArgs e) + private void azureOpenAIToolStripMenuItem_Click(object sender, EventArgs e) { - + // Use the new unified AI Provider config form + if (Application.OpenForms.OfType().Count() == 1) + { + Application.OpenForms.OfType().First().BringToFront(); + } + else + { + AIProviderConfigForm configForm = new AIProviderConfigForm("actioner"); + configForm.ShowDialog(); + } } - private void configureToolStripMenuItem_Click(object sender, EventArgs e) + private void ApplyTheme(string themeName = null) // Fix ApplyTheme method to handle parameter { + // Initialize ThemeManager if it doesn't exist yet + if (_themeManager == null) + { + _themeManager = new ThemeManager(); + } - } + // Use provided theme name or current theme from ThemeManager + string theme = themeName ?? _themeManager.CurrentTheme; - private void azureOpenAIToolStripMenuItem_Click(object sender, EventArgs e) - { - // Check if the config form is already open - if (Application.OpenForms.OfType().Count() == 1) + if (theme == "Dark") { - // If it is, bring it to the front - Application.OpenForms.OfType().First().BringToFront(); + ApplyDarkTheme(); } else { - // If it isn't, create a new instance of the form - ConfigForm configForm = new ConfigForm("actioner"); - configForm.Show(); + ApplyLightTheme(); + } + + // Apply theme to all dynamically created controls + _themeManager.ApplyThemeToControls(this); + } + + private void ApplyLightTheme() + { + // Let ThemeManager handle applying the light theme to all controls + _themeManager.ApplyThemeToControls(this); + + // Update status indicator + UpdateStatusIndicators(); + } + + private void ApplyDarkTheme() + { + // Let ThemeManager handle applying the dark theme to all controls + _themeManager.ApplyThemeToControls(this); + + // Apply specific overrides for any controls that need special handling + + // Update status indicator + UpdateStatusIndicators(); + } + + private void UpdateStatusIndicators() // Add missing UpdateStatusIndicators method + { + // Update any status indicators based on current state + // This can be extended later if more indicators are added + + // Update microphone button appearance based on isListening state + if (microphoneButton != null) + { + microphoneButton.Text = isListening ? "⏹️" : "🎤"; + microphoneButton.BackColor = isListening ? Color.Red : SystemColors.Control; } } @@ -56,13 +110,27 @@ private void Form1_Load(object sender, EventArgs e) { // Check if tools are configured, if not, create default configuration string toolConfigName = "toolsconfig"; - if (!ToolConfig.IsConfigured(toolConfigName)) + try + { + if (!ToolConfig.IsConfigured(toolConfigName)) + { + // Create and save default configuration with mouse and screen capture disabled + var defaultConfig = new ToolConfig(); + defaultConfig.SaveConfig(toolConfigName); + } + + // Load current configuration and update UI + var toolConfig = ToolConfig.LoadConfig(toolConfigName); + multiAgentModeToolStripMenuItem.Checked = toolConfig.EnableMultiAgentMode; + } + catch (Exception ex) { - // Create and save default configuration with mouse and screen capture disabled - var defaultConfig = new ToolConfig(); - defaultConfig.SaveConfig(toolConfigName); + MessageBox.Show($"Error configuring tools: {ex.Message}", "Configuration Error"); } - + + // Add this call after initializing UI components + ApplyTheme(); + // Create messages panel as a FlowLayoutPanel with auto-scroll messagesPanel = new FlowLayoutPanel { @@ -104,6 +172,20 @@ private void Form1_Load(object sender, EventArgs e) sendButton.Click += SendButton_Click; inputPanel.Controls.Add(sendButton); + // Create microphone button + microphoneButton = new Button + { + Dock = DockStyle.Right, + Text = "🎤", + Width = 40, + Font = new Font("Segoe UI", 12F) + }; + microphoneButton.Click += MicrophoneButton_Click; + inputPanel.Controls.Add(microphoneButton); + + // Initialize speech recognition service + InitializeSpeechRecognition(); + // Handle window resize to adjust message widths this.Resize += (s, args) => { @@ -131,15 +213,165 @@ private void Form1_Load(object sender, EventArgs e) AddMessage("AI Assistant", "Welcome! How can I help you today?", true); } + private void InitializeSpeechRecognition() + { + try + { + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + + // Only initialize if speech recognition is enabled in config + if (toolConfig.EnableSpeechRecognition) + { + speechRecognition = new SpeechRecognitionService(); + speechRecognition.SpeechRecognized += (sender, result) => + { + // Use Invoke to update UI from a different thread + if (this.InvokeRequired) + { + this.Invoke(new Action((text) => + { + userInputTextBox.Text = text; + }), result); + } + else + { + userInputTextBox.Text = result; + } + }; + + // Add handler for voice commands + if (toolConfig.EnableVoiceCommands) + { + speechRecognition.CommandRecognized += (sender, command) => + { + if (this.InvokeRequired) + { + this.Invoke(new Action(() => + { + if (!string.IsNullOrWhiteSpace(userInputTextBox.Text)) + { + AddMessage("System", $"Voice command recognized: \"{command}\"", true); + SendButton_Click(this, EventArgs.Empty); + } + })); + } + else + { + if (!string.IsNullOrWhiteSpace(userInputTextBox.Text)) + { + AddMessage("System", $"Voice command recognized: \"{command}\"", true); + SendButton_Click(this, EventArgs.Empty); + } + } + }; + + // Start continuous listening if voice commands are enabled + if (toolConfig.EnableVoiceCommands) + { + //speechRecognition.StartListening(); + } + } + } + else + { + microphoneButton.Enabled = false; + } + } + catch (Exception ex) + { + MessageBox.Show($"Error initializing speech recognition: {ex.Message}", "Speech Recognition Error"); + microphoneButton.Enabled = false; + } + } + + private void MicrophoneButton_Click(object sender, EventArgs e) + { + if (speechRecognition == null) + { + MessageBox.Show("Speech recognition is not available.", "Feature Not Available"); + return; + } + + if (isListening) + { + StopListening(); + } + else + { + StartListening(); + } + } + + /// + /// Starts listening for voice input and updates UI accordingly. + /// + private void StartListening() + { + try + { + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + + // Clear existing text before starting to listen + userInputTextBox.Text = ""; + + // Change button appearance to indicate recording + microphoneButton.Text = "⏹️"; + microphoneButton.BackColor = Color.Red; + isListening = true; + + // Start listening + speechRecognition.StartListening(); + + // Add temporary message to indicate we're listening + AddMessage("System", "Listening... Speak now." + + (toolConfig.EnableVoiceCommands ? + $" Say \"{toolConfig.VoiceCommandPhrase}\" to send your message." : ""), true); + } + catch (Exception ex) + { + MessageBox.Show($"Error starting voice recognition: {ex.Message}", "Voice Recognition Error"); + StopListening(); + } + } + + private void StopListening() + { + try + { + // Change button appearance back to normal + microphoneButton.Text = "🎤"; + microphoneButton.BackColor = SystemColors.Control; + isListening = false; + + // Stop listening + speechRecognition.StopListening(); + + // Remove the listening message + RemoveMessagesByAuthor("System"); + + // If we have recognized text, send it automatically + if (!string.IsNullOrWhiteSpace(userInputTextBox.Text)) + { + SendButton_Click(this, EventArgs.Empty); + } + } + catch (Exception ex) + { + MessageBox.Show($"Error stopping voice recognition: {ex.Message}", "Voice Recognition Error"); + } + } + private void allowUserInput(bool enable) { - userInputTextBox.Enabled = enable; - sendButton.Enabled = enable; + userInputTextBox.Enabled = enable; + sendButton.Enabled = enable; + microphoneButton.Enabled = enable && speechRecognition != null; } private async void SendButton_Click(object sender, EventArgs e) { allowUserInput(false); + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); // Check if the user input is empty string userInput = userInputTextBox.Text; @@ -152,14 +384,12 @@ private async void SendButton_Click(object sender, EventArgs e) // Add user message to UI AddMessage("You", userInput, false); - try { string aiResponse = await GetAIResponseAsync(userInput); AddMessage("AI", aiResponse, true); - + // Check if we should retain chat history - var toolConfig = ToolConfig.LoadConfig("toolsconfig"); if (!toolConfig.RetainChatHistory) { // Keep only the latest exchange in chat history @@ -172,11 +402,13 @@ private async void SendButton_Click(object sender, EventArgs e) catch (Exception ex) { MessageBox.Show($"Error communicating with AI: {ex.Message}", "Error"); + } + finally + { + userInputTextBox.Clear(); + RemoveMessagesByAuthor("System"); allowUserInput(true); } - - userInputTextBox.Clear(); - allowUserInput(true); } private async Task GetAIResponseAsync(string userInput) @@ -184,39 +416,98 @@ private async Task GetAIResponseAsync(string userInput) // Get the current config to determine which model to use var actionerConfig = APIConfig.LoadConfig("actioner"); var githubConfig = APIConfig.LoadConfig("github"); - var toolConfig = ToolConfig.LoadConfig("toolsconfig"); // Added to ensure we have the latest config // Create a StringBuilder to collect plugin output StringBuilder pluginOutput = new StringBuilder(); - + // Define a plugin output handler that adds messages to the plugin output - PluginOutputHandler outputHandler = (message) => { + PluginOutputHandler outputHandler = (message) => + { pluginOutput.AppendLine(message); }; - // Use Actioner model - passing down the toolConfig to ensure chat history setting is properly applied - Actioner actioner = new Actioner(outputHandler); - if(toolConfig.RetainChatHistory) + // Show a pre-execution notification to the user in the UI + AddMessage("System", "Your request is being processed. Please wait...", true); + + string aiResponse = string.Empty; + + // Use TaskNotifier to wrap the action execution with notifications + try { - actioner.SetChatHistory(chatHistory); + await TaskNotifier.RunWithNotificationAsync( + "Request Processing", + "Processing your request and preparing tools", + async () => + { + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + + // Use Actioner model with the notification infrastructure + Actioner actioner = new Actioner(outputHandler); + + // Set multi-agent mode based on tool configuration + actioner.SetMultiAgentMode(toolConfig.EnableMultiAgentMode); + + if (toolConfig.RetainChatHistory) + { + actioner.SetChatHistory(chatHistory); + } + + // Execute the action and get the AI response + aiResponse = await actioner.ExecuteAction(userInput); + }); + + // If there's plugin output, prepend it to the AI response + if (pluginOutput.Length > 0) + { + aiResponse = $"{pluginOutput}\n\n{aiResponse}"; + } + + // Remove the "processing" message from the UI + RemoveMessageIfSystem(); + return aiResponse; } - - // Execute the action and get the AI response - string aiResponse = await actioner.ExecuteAction(userInput); - - // If there's plugin output, prepend it to the AI response - if (pluginOutput.Length > 0) + catch (Exception ex) { - return $"{pluginOutput}\n\n{aiResponse}"; + // Remove the "processing" message from the UI + RemoveMessageIfSystem(); + + return $"Error processing request: {ex.Message}"; } - - return aiResponse; } - private void AddMessage(string author, string message, bool isInbound) + private void RemoveMessageIfSystem() // Add missing RemoveMessageIfSystem method + { + // Find and remove any "System" messages that indicate processing + for (int i = messagesPanel.Controls.Count - 1; i >= 0; i--) + { + Control c = messagesPanel.Controls[i]; + if (c is Panel bubble) + { + Label authorLabel = bubble.Controls.OfType().FirstOrDefault(); + if (authorLabel != null && + authorLabel.Text.Equals("System", StringComparison.OrdinalIgnoreCase) && + bubble.Controls.OfType().Any(tb => tb.Text.Contains("processing") || tb.Text.Contains("Please wait"))) + { + messagesPanel.Controls.RemoveAt(i); + bubble.Dispose(); + } + } + } + + messagesPanel.PerformLayout(); + } + + public void AddMessage(string author, string message, bool isInbound) { + // If called from a non-UI thread, use Invoke to switch to UI thread + if (this.InvokeRequired) + { + this.Invoke(new Action(AddMessage), new object[] { author, message, isInbound }); + return; + } + // Create a new chat message and add to history - var chatMessage = new ChatMessage + var chatMessage = new LocalChatMessage { Author = author, Content = message, @@ -273,14 +564,15 @@ private void AddMessage(string author, string message, bool isInbound) // Apply markdown formatting MarkdownHelper.ApplyMarkdownFormatting(messageRichTextBox, message); - + // Adjust height to fit content messageRichTextBox.Height = messageRichTextBox.GetPositionFromCharIndex(messageRichTextBox.TextLength).Y + 20; - + // Add copy context menu ContextMenuStrip contextMenu = new ContextMenuStrip(); ToolStripMenuItem copyMenuItem = new ToolStripMenuItem("Copy"); - copyMenuItem.Click += (sender, args) => { + copyMenuItem.Click += (sender, args) => + { if (messageRichTextBox.SelectedText.Length > 0) Clipboard.SetText(messageRichTextBox.SelectedText); else @@ -288,7 +580,7 @@ private void AddMessage(string author, string message, bool isInbound) }; contextMenu.Items.Add(copyMenuItem); messageRichTextBox.ContextMenuStrip = contextMenu; - + bubblePanel.Controls.Add(messageRichTextBox); currentY += messageRichTextBox.Height + 2; } @@ -308,20 +600,21 @@ private void AddMessage(string author, string message, bool isInbound) ScrollBars = ScrollBars.None, WordWrap = true }; - + // Auto-size the TextBox to fit content int textHeight = TextRenderer.MeasureText( - messageTextBox.Text, - messageTextBox.Font, - new Size(bubbleWidth - 20, int.MaxValue), + messageTextBox.Text, + messageTextBox.Font, + new Size(bubbleWidth - 20, int.MaxValue), TextFormatFlags.WordBreak ).Height; messageTextBox.Height = textHeight + 10; - + // Add copy context menu ContextMenuStrip contextMenu = new ContextMenuStrip(); ToolStripMenuItem copyMenuItem = new ToolStripMenuItem("Copy"); - copyMenuItem.Click += (sender, args) => { + copyMenuItem.Click += (sender, args) => + { if (messageTextBox.SelectedText.Length > 0) Clipboard.SetText(messageTextBox.SelectedText); else @@ -329,7 +622,7 @@ private void AddMessage(string author, string message, bool isInbound) }; contextMenu.Items.Add(copyMenuItem); messageTextBox.ContextMenuStrip = contextMenu; - + bubblePanel.Controls.Add(messageTextBox); currentY += messageTextBox.Height + 2; } @@ -371,36 +664,47 @@ private void AddMessage(string author, string message, bool isInbound) messagesPanel.PerformLayout(); } - private void githubToolStripMenuItem_Click(object sender, EventArgs e) + private void omniParserToolStripMenuItem_Click(object sender, EventArgs e) { - // Check if the config form is already open - if (Application.OpenForms.OfType().Count() == 1) - { - // If it is, bring it to the front - Application.OpenForms.OfType().First().BringToFront(); - } - else - { - // If it isn't, create a new instance of the form - ConfigForm configForm = new ConfigForm("github"); - configForm.Show(); - } + // OmniParser removed - show message + MessageBox.Show("OmniParser has been removed. Use Playwright for web automation instead.", + "Feature Removed", MessageBoxButtons.OK, MessageBoxIcon.Information); } - private void omniParserToolStripMenuItem_Click(object sender, EventArgs e) + private void RemoveMessagesByAuthor(string author) { - // Check if the config form is already open - if (Application.OpenForms.OfType().Count() == 1) + if (string.IsNullOrWhiteSpace(author)) return; + + /* 1. Strip them out of the in-memory conversation -------- */ + chatHistory.RemoveAll(m => + m.Author.Equals(author, StringComparison.OrdinalIgnoreCase)); + + /* 2. Find every message bubble whose first child Label * + * contains the requested author name, queue for removal */ + var doomed = new List(); + + foreach (Control c in messagesPanel.Controls) { - // If it is, bring it to the front - Application.OpenForms.OfType().First().BringToFront(); + if (c is Panel bubble) + { + // the author label is the first control we added + var lbl = bubble.Controls.OfType().FirstOrDefault(); + if (lbl != null && + lbl.Text.Equals(author, StringComparison.OrdinalIgnoreCase)) + { + doomed.Add(bubble); + } + } } - else + + /* 3. Remove from UI and dispose to free resources --------- */ + foreach (var bubble in doomed) { - // If it isn't, create a new instance of the form - OmniParserForm omniParserForm = new OmniParserForm(); - omniParserForm.Show(); + messagesPanel.Controls.Remove(bubble); + bubble.Dispose(); } + + messagesPanel.PerformLayout(); } private void toolsToolStripMenuItem_Click(object sender, EventArgs e) @@ -415,7 +719,7 @@ private void toolsToolStripMenuItem_Click(object sender, EventArgs e) { // Check if tool configuration exists bool isConfigured = ToolConfig.IsConfigured("toolsconfig"); - + // If it isn't, create a new instance of the form // Pass true to open as new configuration if not configured ToolConfigForm toolConfigForm = new ToolConfigForm(!isConfigured); @@ -432,11 +736,158 @@ private void newChatToolStripMenuItem_Click(object sender, EventArgs e) userInputTextBox.Clear(); userInputTextBox.Enabled = true; sendButton.Enabled = true; + } + // Ensure proper disposal of SpeechRecognitionService + protected override void OnFormClosing(FormClosingEventArgs e) + { + base.OnFormClosing(e); + + // Dispose of speech recognition service if initialized + speechRecognition?.Dispose(); + } + + // Export menu event handlers + private void exportToJSONToolStripMenuItem_Click(object sender, EventArgs e) + { + ChatExporter.ExportToJson(chatHistory); + } + + private void exportToMarkdownToolStripMenuItem_Click(object sender, EventArgs e) + { + ChatExporter.ExportToMarkdown(chatHistory); + } + + private void exportDebugLogToolStripMenuItem_Click(object sender, EventArgs e) + { + ChatExporter.ExportWithToolCalls(chatHistory); + } + + private void copyToClipboardToolStripMenuItem_Click(object sender, EventArgs e) + { + ChatExporter.CopyToClipboard(chatHistory); + } + + // AI Agent configuration handlers + private void actionerAgentToolStripMenuItem_Click(object sender, EventArgs e) + { + if (Application.OpenForms.OfType().Count() == 1) + { + Application.OpenForms.OfType().First().BringToFront(); + } + else + { + AIProviderConfigForm configForm = new AIProviderConfigForm("actioner"); + configForm.ShowDialog(); + } + } + + private void plannerAgentToolStripMenuItem_Click(object sender, EventArgs e) + { + if (Application.OpenForms.OfType().Count() == 1) + { + Application.OpenForms.OfType().First().BringToFront(); + } + else + { + AIProviderConfigForm configForm = new AIProviderConfigForm("planner"); + configForm.ShowDialog(); + } + } + + private void coordinatorAgentToolStripMenuItem_Click(object sender, EventArgs e) + { + if (Application.OpenForms.OfType().Count() == 1) + { + Application.OpenForms.OfType().First().BringToFront(); + } + else + { + AIProviderConfigForm configForm = new AIProviderConfigForm("coordinator"); + configForm.ShowDialog(); + } + } + + private void githubAgentToolStripMenuItem_Click(object sender, EventArgs e) + { + if (Application.OpenForms.OfType().Count() == 1) + { + Application.OpenForms.OfType().First().BringToFront(); + } + else + { + AIProviderConfigForm configForm = new AIProviderConfigForm("github"); + configForm.ShowDialog(); + } + } + + // Multi-agent mode toggle + private void multiAgentModeToolStripMenuItem_Click(object sender, EventArgs e) + { + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + toolConfig.EnableMultiAgentMode = multiAgentModeToolStripMenuItem.Checked; + toolConfig.SaveConfig("toolsconfig"); + + string status = multiAgentModeToolStripMenuItem.Checked ? "enabled" : "disabled"; + AddMessage("System", $"Multi-Agent Mode {status}. " + + (multiAgentModeToolStripMenuItem.Checked + ? "Using Coordinator → Planner → Actioner workflow with up to 25 steps." + : "Using direct Actioner execution."), true); + } + + // View menu handlers + private void activityMonitorToolStripMenuItem_Click(object sender, EventArgs e) + { + // TODO: Toggle activity monitor panel + MessageBox.Show("Activity Monitor feature coming soon!", "Feature Preview", + MessageBoxButtons.OK, MessageBoxIcon.Information); + } + + private void executionVisualizerToolStripMenuItem_Click(object sender, EventArgs e) + { + // TODO: Toggle execution visualizer panel + MessageBox.Show("Execution Visualizer feature coming soon!", "Feature Preview", + MessageBoxButtons.OK, MessageBoxIcon.Information); + } + + // Help menu handlers + private void aboutToolStripMenuItem_Click(object sender, EventArgs e) + { + MessageBox.Show( + "Recursive Control\n\n" + + "AI Computer Control for Windows\n\n" + + "Version: 2.0 (October 2025)\n\n" + + "Features:\n" + + "• Multi-Agent Workflow (Coordinator → Planner → Actioner)\n" + + "• ONNX-powered Screenshot Analysis\n" + + "• Window-Targeted Keyboard/Mouse Control\n" + + "• Browser Automation with Playwright\n" + + "• PowerShell & CMD Execution\n" + + "• Voice Commands\n\n" + + "Visit: github.com/flowdevs-io/Recursive-Control", + "About Recursive Control", + MessageBoxButtons.OK, + MessageBoxIcon.Information); + } + + private void documentationToolStripMenuItem_Click(object sender, EventArgs e) + { + try + { + System.Diagnostics.Process.Start("https://flowdevs-io.github.io/Recursive-Control"); + } + catch + { + MessageBox.Show("Could not open documentation.\n\n" + + "Visit: https://flowdevs-io.github.io/Recursive-Control", + "Documentation", + MessageBoxButtons.OK, + MessageBoxIcon.Information); + } } } - public class ChatMessage + public class LocalChatMessage { public string Author { get; set; } public string Content { get; set; } diff --git a/FlowVision/LMStudioConfigForm.cs b/FlowVision/LMStudioConfigForm.cs new file mode 100644 index 0000000..24d1e84 --- /dev/null +++ b/FlowVision/LMStudioConfigForm.cs @@ -0,0 +1,310 @@ +using System; +using System.Drawing; +using System.Windows.Forms; +using FlowVision.lib.Classes; + +namespace FlowVision +{ + public partial class LMStudioConfigForm : Form + { + private LMStudioConfig config; + + private CheckBox chkEnabled; + private TextBox txtEndpoint; + private TextBox txtModelName; + private TextBox txtApiKey; + private NumericUpDown numTemperature; + private NumericUpDown numMaxTokens; + private NumericUpDown numTimeout; + private Button btnSave; + private Button btnCancel; + private Button btnTestConnection; + private Label lblStatus; + + public LMStudioConfigForm() + { + InitializeComponent(); + config = LMStudioConfig.LoadConfig(); + LoadConfigToUI(); + } + + private void InitializeComponent() + { + this.Text = "LM Studio Configuration"; + this.Size = new Size(600, 500); + this.FormBorderStyle = FormBorderStyle.FixedDialog; + this.MaximizeBox = false; + this.MinimizeBox = false; + this.StartPosition = FormStartPosition.CenterParent; + + int y = 20; + int labelWidth = 150; + int controlWidth = 380; + int leftMargin = 20; + int controlX = leftMargin + labelWidth + 10; + + // Title + var titleLabel = new Label + { + Text = "LM Studio Local AI Configuration", + Font = new Font(this.Font.FontFamily, 12, FontStyle.Bold), + Location = new Point(leftMargin, y), + AutoSize = true + }; + this.Controls.Add(titleLabel); + y += 40; + + // Info label + var infoLabel = new Label + { + Text = "Configure LM Studio to use local AI models instead of Azure OpenAI.\n" + + "Make sure LM Studio is running with a model loaded and the server started.", + Location = new Point(leftMargin, y), + Size = new Size(540, 40), + ForeColor = Color.DarkGray + }; + this.Controls.Add(infoLabel); + y += 50; + + // Enabled checkbox + chkEnabled = new CheckBox + { + Text = "Enable LM Studio (Use local AI instead of Azure)", + Location = new Point(leftMargin, y), + Width = 400, + Checked = config.Enabled + }; + this.Controls.Add(chkEnabled); + y += 35; + + // Endpoint + AddLabel("Endpoint URL:", leftMargin, y); + txtEndpoint = new TextBox + { + Location = new Point(controlX, y), + Width = controlWidth, + Text = config.EndpointURL + }; + this.Controls.Add(txtEndpoint); + y += 30; + + // Model Name + AddLabel("Model Name:", leftMargin, y); + txtModelName = new TextBox + { + Location = new Point(controlX, y), + Width = controlWidth, + Text = config.ModelName + }; + this.Controls.Add(txtModelName); + y += 30; + + // API Key (placeholder) + AddLabel("API Key (optional):", leftMargin, y); + txtApiKey = new TextBox + { + Location = new Point(controlX, y), + Width = controlWidth, + Text = config.APIKey + }; + this.Controls.Add(txtApiKey); + y += 30; + + // Temperature + AddLabel("Temperature:", leftMargin, y); + numTemperature = new NumericUpDown + { + Location = new Point(controlX, y), + Width = 100, + Minimum = 0, + Maximum = 2, + DecimalPlaces = 2, + Increment = 0.1M, + Value = (decimal)config.Temperature + }; + this.Controls.Add(numTemperature); + y += 30; + + // Max Tokens + AddLabel("Max Tokens:", leftMargin, y); + numMaxTokens = new NumericUpDown + { + Location = new Point(controlX, y), + Width = 100, + Minimum = 128, + Maximum = 32768, + Increment = 128, + Value = config.MaxTokens + }; + this.Controls.Add(numMaxTokens); + y += 30; + + // Timeout + AddLabel("Timeout (seconds):", leftMargin, y); + numTimeout = new NumericUpDown + { + Location = new Point(controlX, y), + Width = 100, + Minimum = 30, + Maximum = 600, + Increment = 30, + Value = config.TimeoutSeconds + }; + this.Controls.Add(numTimeout); + y += 40; + + // Status label + lblStatus = new Label + { + Location = new Point(leftMargin, y), + Size = new Size(540, 20), + ForeColor = Color.Blue + }; + this.Controls.Add(lblStatus); + y += 30; + + // Buttons + btnTestConnection = new Button + { + Text = "Test Connection", + Location = new Point(leftMargin, y), + Width = 120, + Height = 30 + }; + btnTestConnection.Click += BtnTestConnection_Click; + this.Controls.Add(btnTestConnection); + + btnSave = new Button + { + Text = "Save", + Location = new Point(controlX + controlWidth - 160, y), + Width = 75, + Height = 30 + }; + btnSave.Click += BtnSave_Click; + this.Controls.Add(btnSave); + + btnCancel = new Button + { + Text = "Cancel", + Location = new Point(controlX + controlWidth - 75, y), + Width = 75, + Height = 30, + DialogResult = DialogResult.Cancel + }; + btnCancel.Click += (s, e) => this.Close(); + this.Controls.Add(btnCancel); + + this.AcceptButton = btnSave; + this.CancelButton = btnCancel; + } + + private void AddLabel(string text, int x, int y) + { + var label = new Label + { + Text = text, + Location = new Point(x, y + 3), + Width = 150 + }; + this.Controls.Add(label); + } + + private void LoadConfigToUI() + { + chkEnabled.Checked = config.Enabled; + txtEndpoint.Text = config.EndpointURL; + txtModelName.Text = config.ModelName; + txtApiKey.Text = config.APIKey; + numTemperature.Value = (decimal)config.Temperature; + numMaxTokens.Value = config.MaxTokens; + numTimeout.Value = config.TimeoutSeconds; + } + + private void SaveUIToConfig() + { + config.Enabled = chkEnabled.Checked; + config.EndpointURL = txtEndpoint.Text; + config.ModelName = txtModelName.Text; + config.APIKey = txtApiKey.Text; + config.Temperature = (double)numTemperature.Value; + config.MaxTokens = (int)numMaxTokens.Value; + config.TimeoutSeconds = (int)numTimeout.Value; + } + + private async void BtnTestConnection_Click(object sender, EventArgs e) + { + lblStatus.Text = "Testing connection..."; + lblStatus.ForeColor = Color.Blue; + btnTestConnection.Enabled = false; + + try + { + // Save current UI values temporarily + SaveUIToConfig(); + + // Try to create a client and make a simple request + var client = new OpenAI.OpenAIClient(new System.ClientModel.ApiKeyCredential(config.APIKey), new OpenAI.OpenAIClientOptions + { + Endpoint = new Uri(config.EndpointURL) + }); + + var chatClient = client.GetChatClient(config.ModelName); + var ichatClient = (Microsoft.Extensions.AI.IChatClient)chatClient; + + // Simple test message + var messages = new System.Collections.Generic.List + { + new Microsoft.Extensions.AI.ChatMessage( + Microsoft.Extensions.AI.ChatRole.User, + "Say 'hello' in one word") + }; + + var response = await ichatClient.GetResponseAsync(messages); + + lblStatus.Text = "✓ Connection successful! LM Studio is responding."; + lblStatus.ForeColor = Color.Green; + } + catch (Exception ex) + { + lblStatus.Text = $"✗ Connection failed: {ex.Message}"; + lblStatus.ForeColor = Color.Red; + + if (ex.Message.Contains("Connection refused") || ex.Message.Contains("No connection")) + { + MessageBox.Show( + "Cannot connect to LM Studio. Please make sure:\n\n" + + "1. LM Studio is running\n" + + "2. A model is loaded\n" + + "3. The server is started (click 'Start Server' in LM Studio)\n" + + $"4. The endpoint is correct: {config.EndpointURL}", + "Connection Failed", + MessageBoxButtons.OK, + MessageBoxIcon.Warning); + } + } + finally + { + btnTestConnection.Enabled = true; + } + } + + private void BtnSave_Click(object sender, EventArgs e) + { + SaveUIToConfig(); + config.SaveConfig(); + + MessageBox.Show( + "LM Studio configuration saved successfully!\n\n" + + (config.Enabled + ? "Local AI is now ENABLED. The application will use LM Studio for AI requests." + : "Local AI is DISABLED. The application will use Azure OpenAI."), + "Configuration Saved", + MessageBoxButtons.OK, + MessageBoxIcon.Information); + + this.DialogResult = DialogResult.OK; + this.Close(); + } + } +} diff --git a/FlowVision/OmniParserForm.Designer.cs b/FlowVision/OmniParserForm.Designer.cs index 77100f2..1110bea 100644 --- a/FlowVision/OmniParserForm.Designer.cs +++ b/FlowVision/OmniParserForm.Designer.cs @@ -28,49 +28,57 @@ protected override void Dispose(bool disposing) /// private void InitializeComponent() { - this.omniParserServerURL = new System.Windows.Forms.TextBox(); this.label1 = new System.Windows.Forms.Label(); - this.saveButton = new System.Windows.Forms.Button(); + this.statusLabel = new System.Windows.Forms.Label(); + this.infoLabel = new System.Windows.Forms.Label(); this.SuspendLayout(); // - // omniParserServerURL - // - this.omniParserServerURL.Font = new System.Drawing.Font("Comic Sans MS", 12F); - this.omniParserServerURL.Location = new System.Drawing.Point(139, 7); - this.omniParserServerURL.Name = "omniParserServerURL"; - this.omniParserServerURL.Size = new System.Drawing.Size(355, 30); - this.omniParserServerURL.TabIndex = 0; - // // label1 // this.label1.AutoSize = true; - this.label1.Font = new System.Drawing.Font("Comic Sans MS", 11.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.label1.Location = new System.Drawing.Point(12, 12); + this.label1.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.label1.Location = new System.Drawing.Point(20, 20); this.label1.Name = "label1"; - this.label1.Size = new System.Drawing.Size(121, 20); + this.label1.Size = new System.Drawing.Size(154, 21); this.label1.TabIndex = 1; - this.label1.Text = "OmniParser URL"; + this.label1.Text = "OmniParser Status:"; + // + // statusLabel + // + this.statusLabel.AutoSize = true; + this.statusLabel.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.statusLabel.ForeColor = System.Drawing.Color.Green; + this.statusLabel.Location = new System.Drawing.Point(180, 20); + this.statusLabel.Name = "statusLabel"; + this.statusLabel.Size = new System.Drawing.Size(196, 21); + this.statusLabel.TabIndex = 3; + this.statusLabel.Text = "✓ Embedded Mode Active"; // - // saveButton + // infoLabel // - this.saveButton.Location = new System.Drawing.Point(162, 41); - this.saveButton.Name = "saveButton"; - this.saveButton.Size = new System.Drawing.Size(140, 23); - this.saveButton.TabIndex = 2; - this.saveButton.Text = "Save"; - this.saveButton.UseVisualStyleBackColor = true; - this.saveButton.Click += new System.EventHandler(this.saveButton_Click); + this.infoLabel.AutoSize = true; + this.infoLabel.Font = new System.Drawing.Font("Segoe UI", 9.75F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.infoLabel.ForeColor = System.Drawing.Color.DimGray; + this.infoLabel.Location = new System.Drawing.Point(21, 55); + this.infoLabel.Name = "infoLabel"; + this.infoLabel.Size = new System.Drawing.Size(380, 34); + this.infoLabel.TabIndex = 4; + this.infoLabel.Text = "FlowVision is using the internal ONNX detection model.\r\nNo external server or configuration is required."; // // OmniParserForm // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; - this.ClientSize = new System.Drawing.Size(506, 76); - this.Controls.Add(this.saveButton); + this.ClientSize = new System.Drawing.Size(440, 110); + this.Controls.Add(this.infoLabel); + this.Controls.Add(this.statusLabel); this.Controls.Add(this.label1); - this.Controls.Add(this.omniParserServerURL); + this.MaximizeBox = false; + this.MinimizeBox = false; this.Name = "OmniParserForm"; - this.Text = "OmniParserForm"; + this.ShowIcon = false; + this.StartPosition = System.Windows.Forms.FormStartPosition.CenterScreen; + this.Text = "OmniParser Configuration"; this.Load += new System.EventHandler(this.OmniParserForm_Load); this.ResumeLayout(false); this.PerformLayout(); @@ -79,8 +87,8 @@ private void InitializeComponent() #endregion - private System.Windows.Forms.TextBox omniParserServerURL; private System.Windows.Forms.Label label1; - private System.Windows.Forms.Button saveButton; + private System.Windows.Forms.Label statusLabel; + private System.Windows.Forms.Label infoLabel; } } \ No newline at end of file diff --git a/FlowVision/OmniParserForm.cs b/FlowVision/OmniParserForm.cs index c9aeec0..aaa2a08 100644 --- a/FlowVision/OmniParserForm.cs +++ b/FlowVision/OmniParserForm.cs @@ -1,11 +1,4 @@ using System; -using System.Collections.Generic; -using System.ComponentModel; -using System.Data; -using System.Drawing; -using System.Linq; -using System.Text; -using System.Threading.Tasks; using System.Windows.Forms; using FlowVision.lib.Classes; @@ -13,35 +6,14 @@ namespace FlowVision { public partial class OmniParserForm : Form { - private OmniParserConfig _config; - public OmniParserForm() { InitializeComponent(); - _config = OmniParserConfig.LoadConfig(); - } - - private void saveButton_Click(object sender, EventArgs e) - { - string url = omniParserServerURL.Text.Trim(); - if (string.IsNullOrEmpty(url)) - { - MessageBox.Show("Please enter a valid URL."); - return; - } - - // Save the URL to the config file - _config.ServerURL = url; - _config.SaveConfig(); - - // Optionally, you can close the form after saving - this.Close(); } private void OmniParserForm_Load(object sender, EventArgs e) { - // Load the URL from the config file - omniParserServerURL.Text = _config.ServerURL; + // Form is now just an informational status display } } } diff --git a/FlowVision/Models/ChatMessageModel.cs b/FlowVision/README.md similarity index 100% rename from FlowVision/Models/ChatMessageModel.cs rename to FlowVision/README.md diff --git a/FlowVision/ToolConfigForm.Designer.cs b/FlowVision/ToolConfigForm.Designer.cs index 27ddf67..e8929ca 100644 --- a/FlowVision/ToolConfigForm.Designer.cs +++ b/FlowVision/ToolConfigForm.Designer.cs @@ -28,154 +28,440 @@ protected override void Dispose(bool disposing) /// private void InitializeComponent() { + this.components = new System.ComponentModel.Container(); this.groupBoxPlugins = new System.Windows.Forms.GroupBox(); + this.indicatorWindow = new System.Windows.Forms.PictureBox(); + this.indicatorMouse = new System.Windows.Forms.PictureBox(); + this.indicatorKeyboard = new System.Windows.Forms.PictureBox(); + this.indicatorScreenCapture = new System.Windows.Forms.PictureBox(); + this.indicatorPowerShell = new System.Windows.Forms.PictureBox(); + this.indicatorCMD = new System.Windows.Forms.PictureBox(); + this.indicatorPlaywright = new System.Windows.Forms.PictureBox(); this.chkMousePlugin = new System.Windows.Forms.CheckBox(); this.chkKeyboardPlugin = new System.Windows.Forms.CheckBox(); this.chkScreenCapturePlugin = new System.Windows.Forms.CheckBox(); this.chkPowerShellPlugin = new System.Windows.Forms.CheckBox(); this.chkCMDPlugin = new System.Windows.Forms.CheckBox(); this.chkWindowSelectionPlugin = new System.Windows.Forms.CheckBox(); + this.chkPlaywrightPlugin = new System.Windows.Forms.CheckBox(); this.enablePluginLoggingCheckBox = new System.Windows.Forms.CheckBox(); + this.chkEnableRemoteControl = new System.Windows.Forms.CheckBox(); + this.numRemotePort = new System.Windows.Forms.NumericUpDown(); + this.lblRemotePort = new System.Windows.Forms.Label(); this.groupBoxSettings = new System.Windows.Forms.GroupBox(); + this.indicatorAutoInvoke = new System.Windows.Forms.PictureBox(); + this.indicatorMultiAgent = new System.Windows.Forms.PictureBox(); + this.chkMultiAgentMode = new System.Windows.Forms.CheckBox(); this.chkAutoInvoke = new System.Windows.Forms.CheckBox(); this.chkRetainChatHistory = new System.Windows.Forms.CheckBox(); this.numTemperature = new System.Windows.Forms.NumericUpDown(); this.lblTemperature = new System.Windows.Forms.Label(); + this.chkDynamicToolPrompts = new System.Windows.Forms.CheckBox(); this.saveButton = new System.Windows.Forms.Button(); this.cancelButton = new System.Windows.Forms.Button(); - this.groupBoxSystemPrompt = new System.Windows.Forms.GroupBox(); - this.txtSystemPrompt = new System.Windows.Forms.TextBox(); + this.groupBoxSpeechRecognition = new System.Windows.Forms.GroupBox(); + this.indicatorSpeech = new System.Windows.Forms.PictureBox(); + this.comboSpeechLanguage = new System.Windows.Forms.ComboBox(); + this.lblSpeechLanguage = new System.Windows.Forms.Label(); + this.chkEnableSpeechRecognition = new System.Windows.Forms.CheckBox(); + this.groupBoxVoiceCommands = new System.Windows.Forms.GroupBox(); + this.indicatorVoiceCmd = new System.Windows.Forms.PictureBox(); + this.txtVoiceCommandPhrase = new System.Windows.Forms.TextBox(); + this.lblVoiceCommandPhrase = new System.Windows.Forms.Label(); + this.chkEnableVoiceCommands = new System.Windows.Forms.CheckBox(); + this.tabControlMain = new System.Windows.Forms.TabControl(); + this.tabPlugins = new System.Windows.Forms.TabPage(); + this.tabAISettings = new System.Windows.Forms.TabPage(); + this.tabVoice = new System.Windows.Forms.TabPage(); + this.tabCoordinator = new System.Windows.Forms.TabPage(); + this.grpCoordinatorConfig = new System.Windows.Forms.GroupBox(); + this.lblCoordinatorPrompt = new System.Windows.Forms.Label(); + this.txtCoordinatorSystemPrompt = new System.Windows.Forms.TextBox(); + this.chkUseCustomCoordinatorConfig = new System.Windows.Forms.CheckBox(); + this.comboCoordinatorConfig = new System.Windows.Forms.ComboBox(); + this.btnConfigureCoordinator = new System.Windows.Forms.Button(); + this.btnResetCoordinator = new System.Windows.Forms.Button(); + this.tabPlanner = new System.Windows.Forms.TabPage(); + this.grpPlannerConfig = new System.Windows.Forms.GroupBox(); + this.lblPlannerPrompt = new System.Windows.Forms.Label(); + this.txtPlannerSystemPrompt = new System.Windows.Forms.TextBox(); + this.chkUseCustomPlannerConfig = new System.Windows.Forms.CheckBox(); + this.comboPlannerConfig = new System.Windows.Forms.ComboBox(); + this.btnConfigurePlanner = new System.Windows.Forms.Button(); + this.btnResetPlanner = new System.Windows.Forms.Button(); + this.tabActioner = new System.Windows.Forms.TabPage(); + this.grpActionerConfig = new System.Windows.Forms.GroupBox(); + this.lblActionerPrompt = new System.Windows.Forms.Label(); + this.txtActionerSystemPrompt = new System.Windows.Forms.TextBox(); + this.chkUseCustomExecutorConfig = new System.Windows.Forms.CheckBox(); + this.comboActionerConfig = new System.Windows.Forms.ComboBox(); + this.btnConfigureActioner = new System.Windows.Forms.Button(); + this.btnResetActioner = new System.Windows.Forms.Button(); + this.tabAppearance = new System.Windows.Forms.TabPage(); + this.groupBoxTheme = new System.Windows.Forms.GroupBox(); + this.labelTheme = new System.Windows.Forms.Label(); + this.cbTheme = new System.Windows.Forms.ComboBox(); + this.tabProfiles = new System.Windows.Forms.TabPage(); + this.groupBoxProfiles = new System.Windows.Forms.GroupBox(); + this.btnDeleteProfile = new System.Windows.Forms.Button(); + this.btnLoadProfile = new System.Windows.Forms.Button(); + this.btnSaveProfile = new System.Windows.Forms.Button(); + this.labelProfile = new System.Windows.Forms.Label(); + this.cmbProfiles = new System.Windows.Forms.ComboBox(); + this.imageListStatus = new System.Windows.Forms.ImageList(this.components); + this.panelSearch = new System.Windows.Forms.Panel(); + this.searchResultLabel = new System.Windows.Forms.Label(); + this.txtSearchSettings = new System.Windows.Forms.TextBox(); + this.lblSearchSettings = new System.Windows.Forms.Label(); + this.saveNotification = new System.Windows.Forms.Panel(); + this.lblSaveNotification = new System.Windows.Forms.Label(); + this.saveNotificationTimer = new System.Windows.Forms.Timer(this.components); this.groupBoxPlugins.SuspendLayout(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorWindow)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorMouse)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorKeyboard)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorScreenCapture)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorPowerShell)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorCMD)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorPlaywright)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.numRemotePort)).BeginInit(); this.groupBoxSettings.SuspendLayout(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorAutoInvoke)).BeginInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorMultiAgent)).BeginInit(); ((System.ComponentModel.ISupportInitialize)(this.numTemperature)).BeginInit(); - this.groupBoxSystemPrompt.SuspendLayout(); + this.groupBoxSpeechRecognition.SuspendLayout(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorSpeech)).BeginInit(); + this.groupBoxVoiceCommands.SuspendLayout(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorVoiceCmd)).BeginInit(); + this.tabControlMain.SuspendLayout(); + this.tabPlugins.SuspendLayout(); + this.tabAISettings.SuspendLayout(); + this.tabVoice.SuspendLayout(); + this.tabCoordinator.SuspendLayout(); + this.grpCoordinatorConfig.SuspendLayout(); + this.tabPlanner.SuspendLayout(); + this.grpPlannerConfig.SuspendLayout(); + this.tabActioner.SuspendLayout(); + this.grpActionerConfig.SuspendLayout(); + this.tabAppearance.SuspendLayout(); + this.groupBoxTheme.SuspendLayout(); + this.tabProfiles.SuspendLayout(); + this.groupBoxProfiles.SuspendLayout(); + this.panelSearch.SuspendLayout(); + this.saveNotification.SuspendLayout(); this.SuspendLayout(); // // groupBoxPlugins // + this.groupBoxPlugins.Controls.Add(this.indicatorWindow); + this.groupBoxPlugins.Controls.Add(this.indicatorMouse); + this.groupBoxPlugins.Controls.Add(this.indicatorKeyboard); + this.groupBoxPlugins.Controls.Add(this.indicatorScreenCapture); + this.groupBoxPlugins.Controls.Add(this.indicatorPowerShell); + this.groupBoxPlugins.Controls.Add(this.indicatorCMD); + this.groupBoxPlugins.Controls.Add(this.indicatorPlaywright); this.groupBoxPlugins.Controls.Add(this.chkMousePlugin); this.groupBoxPlugins.Controls.Add(this.chkKeyboardPlugin); this.groupBoxPlugins.Controls.Add(this.chkScreenCapturePlugin); this.groupBoxPlugins.Controls.Add(this.chkPowerShellPlugin); this.groupBoxPlugins.Controls.Add(this.chkCMDPlugin); this.groupBoxPlugins.Controls.Add(this.chkWindowSelectionPlugin); - this.groupBoxPlugins.Font = new System.Drawing.Font("Comic Sans MS", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.groupBoxPlugins.Location = new System.Drawing.Point(12, 12); + this.groupBoxPlugins.Controls.Add(this.chkPlaywrightPlugin); + this.groupBoxPlugins.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.groupBoxPlugins.Location = new System.Drawing.Point(6, 6); this.groupBoxPlugins.Name = "groupBoxPlugins"; - this.groupBoxPlugins.Size = new System.Drawing.Size(436, 200); + this.groupBoxPlugins.Size = new System.Drawing.Size(436, 240); this.groupBoxPlugins.TabIndex = 0; this.groupBoxPlugins.TabStop = false; this.groupBoxPlugins.Text = "Available Plugins"; // + // indicatorWindow + // + this.indicatorWindow.BackColor = System.Drawing.Color.Transparent; + this.indicatorWindow.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorWindow.Location = new System.Drawing.Point(390, 171); + this.indicatorWindow.Name = "indicatorWindow"; + this.indicatorWindow.Size = new System.Drawing.Size(24, 24); + this.indicatorWindow.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorWindow.TabIndex = 11; + this.indicatorWindow.TabStop = false; + // + // indicatorMouse + // + this.indicatorMouse.BackColor = System.Drawing.Color.Transparent; + this.indicatorMouse.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorMouse.Location = new System.Drawing.Point(390, 142); + this.indicatorMouse.Name = "indicatorMouse"; + this.indicatorMouse.Size = new System.Drawing.Size(24, 24); + this.indicatorMouse.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorMouse.TabIndex = 10; + this.indicatorMouse.TabStop = false; + // + // indicatorKeyboard + // + this.indicatorKeyboard.BackColor = System.Drawing.Color.Transparent; + this.indicatorKeyboard.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorKeyboard.Location = new System.Drawing.Point(390, 113); + this.indicatorKeyboard.Name = "indicatorKeyboard"; + this.indicatorKeyboard.Size = new System.Drawing.Size(24, 24); + this.indicatorKeyboard.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorKeyboard.TabIndex = 9; + this.indicatorKeyboard.TabStop = false; + // + // indicatorScreenCapture + // + this.indicatorScreenCapture.BackColor = System.Drawing.Color.Transparent; + this.indicatorScreenCapture.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorScreenCapture.Location = new System.Drawing.Point(390, 84); + this.indicatorScreenCapture.Name = "indicatorScreenCapture"; + this.indicatorScreenCapture.Size = new System.Drawing.Size(24, 24); + this.indicatorScreenCapture.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorScreenCapture.TabIndex = 8; + this.indicatorScreenCapture.TabStop = false; + // + // indicatorPowerShell + // + this.indicatorPowerShell.BackColor = System.Drawing.Color.Transparent; + this.indicatorPowerShell.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorPowerShell.Location = new System.Drawing.Point(390, 55); + this.indicatorPowerShell.Name = "indicatorPowerShell"; + this.indicatorPowerShell.Size = new System.Drawing.Size(24, 24); + this.indicatorPowerShell.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorPowerShell.TabIndex = 7; + this.indicatorPowerShell.TabStop = false; + // + // indicatorCMD + // + this.indicatorCMD.BackColor = System.Drawing.Color.Transparent; + this.indicatorCMD.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorCMD.Location = new System.Drawing.Point(390, 26); + this.indicatorCMD.Name = "indicatorCMD"; + this.indicatorCMD.Size = new System.Drawing.Size(24, 24); + this.indicatorCMD.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorCMD.TabIndex = 6; + this.indicatorCMD.TabStop = false; + // + // indicatorPlaywright + // + this.indicatorPlaywright.BackColor = System.Drawing.Color.Transparent; + this.indicatorPlaywright.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorPlaywright.Location = new System.Drawing.Point(390, 200); + this.indicatorPlaywright.Name = "indicatorPlaywright"; + this.indicatorPlaywright.Size = new System.Drawing.Size(24, 24); + this.indicatorPlaywright.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorPlaywright.TabIndex = 12; + this.indicatorPlaywright.TabStop = false; + // // chkMousePlugin // this.chkMousePlugin.AutoSize = true; - this.chkMousePlugin.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkMousePlugin.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.chkMousePlugin.Location = new System.Drawing.Point(17, 142); this.chkMousePlugin.Name = "chkMousePlugin"; - this.chkMousePlugin.Size = new System.Drawing.Size(288, 23); + this.chkMousePlugin.Size = new System.Drawing.Size(287, 23); this.chkMousePlugin.TabIndex = 4; this.chkMousePlugin.Text = "Enable Mouse Plugin (Mouse Automation)"; this.chkMousePlugin.UseVisualStyleBackColor = true; + this.chkMousePlugin.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); // // chkKeyboardPlugin // this.chkKeyboardPlugin.AutoSize = true; - this.chkKeyboardPlugin.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkKeyboardPlugin.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.chkKeyboardPlugin.Location = new System.Drawing.Point(17, 113); this.chkKeyboardPlugin.Name = "chkKeyboardPlugin"; - this.chkKeyboardPlugin.Size = new System.Drawing.Size(328, 23); + this.chkKeyboardPlugin.Size = new System.Drawing.Size(319, 23); this.chkKeyboardPlugin.TabIndex = 3; this.chkKeyboardPlugin.Text = "Enable Keyboard Plugin (Keyboard Automation)"; this.chkKeyboardPlugin.UseVisualStyleBackColor = true; + this.chkKeyboardPlugin.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); // // chkScreenCapturePlugin // this.chkScreenCapturePlugin.AutoSize = true; - this.chkScreenCapturePlugin.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkScreenCapturePlugin.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.chkScreenCapturePlugin.Location = new System.Drawing.Point(17, 84); this.chkScreenCapturePlugin.Name = "chkScreenCapturePlugin"; - this.chkScreenCapturePlugin.Size = new System.Drawing.Size(313, 23); + this.chkScreenCapturePlugin.Size = new System.Drawing.Size(292, 23); this.chkScreenCapturePlugin.TabIndex = 2; this.chkScreenCapturePlugin.Text = "Enable Screen Capture Plugin (Screenshots)"; this.chkScreenCapturePlugin.UseVisualStyleBackColor = true; + this.chkScreenCapturePlugin.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); // // chkPowerShellPlugin // this.chkPowerShellPlugin.AutoSize = true; - this.chkPowerShellPlugin.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkPowerShellPlugin.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.chkPowerShellPlugin.Location = new System.Drawing.Point(17, 55); this.chkPowerShellPlugin.Name = "chkPowerShellPlugin"; - this.chkPowerShellPlugin.Size = new System.Drawing.Size(343, 23); + this.chkPowerShellPlugin.Size = new System.Drawing.Size(330, 23); this.chkPowerShellPlugin.TabIndex = 1; this.chkPowerShellPlugin.Text = "Enable PowerShell Plugin (PowerShell Commands)"; this.chkPowerShellPlugin.UseVisualStyleBackColor = true; + this.chkPowerShellPlugin.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); // // chkCMDPlugin // this.chkCMDPlugin.AutoSize = true; - this.chkCMDPlugin.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkCMDPlugin.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.chkCMDPlugin.Location = new System.Drawing.Point(17, 26); this.chkCMDPlugin.Name = "chkCMDPlugin"; - this.chkCMDPlugin.Size = new System.Drawing.Size(268, 23); + this.chkCMDPlugin.Size = new System.Drawing.Size(272, 23); this.chkCMDPlugin.TabIndex = 0; this.chkCMDPlugin.Text = "Enable CMD Plugin (Command Prompt)"; this.chkCMDPlugin.UseVisualStyleBackColor = true; + this.chkCMDPlugin.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); // // chkWindowSelectionPlugin // this.chkWindowSelectionPlugin.AutoSize = true; - this.chkWindowSelectionPlugin.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkWindowSelectionPlugin.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.chkWindowSelectionPlugin.Location = new System.Drawing.Point(17, 171); this.chkWindowSelectionPlugin.Name = "chkWindowSelectionPlugin"; - this.chkWindowSelectionPlugin.Size = new System.Drawing.Size(351, 23); + this.chkWindowSelectionPlugin.Size = new System.Drawing.Size(337, 23); this.chkWindowSelectionPlugin.TabIndex = 5; this.chkWindowSelectionPlugin.Text = "Enable Window Selection Plugin (Window Handles)"; this.chkWindowSelectionPlugin.UseVisualStyleBackColor = true; + this.chkWindowSelectionPlugin.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); + // + // chkPlaywrightPlugin + // + this.chkPlaywrightPlugin.AutoSize = true; + this.chkPlaywrightPlugin.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkPlaywrightPlugin.Location = new System.Drawing.Point(17, 200); + this.chkPlaywrightPlugin.Name = "chkPlaywrightPlugin"; + this.chkPlaywrightPlugin.Size = new System.Drawing.Size(196, 23); + this.chkPlaywrightPlugin.TabIndex = 6; + this.chkPlaywrightPlugin.Text = "Playwright Browser Plugin"; + this.chkPlaywrightPlugin.UseVisualStyleBackColor = true; + this.chkPlaywrightPlugin.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); // // enablePluginLoggingCheckBox // this.enablePluginLoggingCheckBox.AutoSize = true; - this.enablePluginLoggingCheckBox.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.enablePluginLoggingCheckBox.Location = new System.Drawing.Point(12, 218); + this.enablePluginLoggingCheckBox.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.enablePluginLoggingCheckBox.Location = new System.Drawing.Point(23, 252); this.enablePluginLoggingCheckBox.Name = "enablePluginLoggingCheckBox"; - this.enablePluginLoggingCheckBox.Size = new System.Drawing.Size(140, 23); - this.enablePluginLoggingCheckBox.TabIndex = 6; + this.enablePluginLoggingCheckBox.Size = new System.Drawing.Size(135, 23); + this.enablePluginLoggingCheckBox.TabIndex = 7; this.enablePluginLoggingCheckBox.Text = "Log Plugin Usage"; this.enablePluginLoggingCheckBox.UseVisualStyleBackColor = true; - // + // + // chkEnableRemoteControl + // + this.chkEnableRemoteControl.AutoSize = true; + this.chkEnableRemoteControl.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkEnableRemoteControl.Location = new System.Drawing.Point(23, 281); + this.chkEnableRemoteControl.Name = "chkEnableRemoteControl"; + this.chkEnableRemoteControl.Size = new System.Drawing.Size(171, 23); + this.chkEnableRemoteControl.TabIndex = 8; + this.chkEnableRemoteControl.Text = "Enable Remote Control"; + this.chkEnableRemoteControl.UseVisualStyleBackColor = true; + // + // lblRemotePort + // + this.lblRemotePort.AutoSize = true; + this.lblRemotePort.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.lblRemotePort.Location = new System.Drawing.Point(40, 310); + this.lblRemotePort.Name = "lblRemotePort"; + this.lblRemotePort.Size = new System.Drawing.Size(82, 19); + this.lblRemotePort.TabIndex = 9; + this.lblRemotePort.Text = "Listen Port:"; + // + // numRemotePort + // + this.numRemotePort.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.numRemotePort.Location = new System.Drawing.Point(128, 308); + this.numRemotePort.Maximum = new decimal(new int[] { + 65535, + 0, + 0, + 0}); + this.numRemotePort.Minimum = new decimal(new int[] { + 1, + 0, + 0, + 0}); + this.numRemotePort.Name = "numRemotePort"; + this.numRemotePort.Size = new System.Drawing.Size(80, 25); + this.numRemotePort.TabIndex = 10; + this.numRemotePort.Value = new decimal(new int[] { + 8085, + 0, + 0, + 0}); + // // groupBoxSettings - // + // + this.groupBoxSettings.Controls.Add(this.indicatorAutoInvoke); + this.groupBoxSettings.Controls.Add(this.indicatorMultiAgent); + this.groupBoxSettings.Controls.Add(this.chkMultiAgentMode); this.groupBoxSettings.Controls.Add(this.chkAutoInvoke); this.groupBoxSettings.Controls.Add(this.chkRetainChatHistory); this.groupBoxSettings.Controls.Add(this.numTemperature); this.groupBoxSettings.Controls.Add(this.lblTemperature); - this.groupBoxSettings.Font = new System.Drawing.Font("Comic Sans MS", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.groupBoxSettings.Location = new System.Drawing.Point(12, 253); + this.groupBoxSettings.Controls.Add(this.chkDynamicToolPrompts); + this.groupBoxSettings.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.groupBoxSettings.Location = new System.Drawing.Point(6, 6); this.groupBoxSettings.Name = "groupBoxSettings"; - this.groupBoxSettings.Size = new System.Drawing.Size(436, 120); + this.groupBoxSettings.Size = new System.Drawing.Size(436, 230); this.groupBoxSettings.TabIndex = 1; this.groupBoxSettings.TabStop = false; this.groupBoxSettings.Text = "AI Settings"; // + // indicatorAutoInvoke + // + this.indicatorAutoInvoke.BackColor = System.Drawing.Color.Transparent; + this.indicatorAutoInvoke.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorAutoInvoke.Location = new System.Drawing.Point(390, 59); + this.indicatorAutoInvoke.Name = "indicatorAutoInvoke"; + this.indicatorAutoInvoke.Size = new System.Drawing.Size(24, 24); + this.indicatorAutoInvoke.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorAutoInvoke.TabIndex = 12; + this.indicatorAutoInvoke.TabStop = false; + // + // indicatorMultiAgent + // + this.indicatorMultiAgent.BackColor = System.Drawing.Color.Transparent; + this.indicatorMultiAgent.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorMultiAgent.Location = new System.Drawing.Point(390, 117); + this.indicatorMultiAgent.Name = "indicatorMultiAgent"; + this.indicatorMultiAgent.Size = new System.Drawing.Size(24, 24); + this.indicatorMultiAgent.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorMultiAgent.TabIndex = 5; + this.indicatorMultiAgent.TabStop = false; + // + // chkMultiAgentMode + // + this.chkMultiAgentMode.AutoSize = true; + this.chkMultiAgentMode.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkMultiAgentMode.Location = new System.Drawing.Point(17, 117); + this.chkMultiAgentMode.Name = "chkMultiAgentMode"; + this.chkMultiAgentMode.Size = new System.Drawing.Size(187, 23); + this.chkMultiAgentMode.TabIndex = 4; + this.chkMultiAgentMode.Text = "Enable Multi-Agent Mode"; + this.chkMultiAgentMode.UseVisualStyleBackColor = true; + this.chkMultiAgentMode.CheckedChanged += new System.EventHandler(this.chkMultiAgentMode_CheckedChanged); + // // chkAutoInvoke // this.chkAutoInvoke.AutoSize = true; - this.chkAutoInvoke.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkAutoInvoke.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.chkAutoInvoke.Location = new System.Drawing.Point(17, 59); this.chkAutoInvoke.Name = "chkAutoInvoke"; - this.chkAutoInvoke.Size = new System.Drawing.Size(415, 23); + this.chkAutoInvoke.Size = new System.Drawing.Size(385, 23); this.chkAutoInvoke.TabIndex = 2; this.chkAutoInvoke.Text = "Auto-Invoke Functions (Let AI execute tools automatically)"; this.chkAutoInvoke.UseVisualStyleBackColor = true; + this.chkAutoInvoke.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); // // chkRetainChatHistory // this.chkRetainChatHistory.AutoSize = true; - this.chkRetainChatHistory.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkRetainChatHistory.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.chkRetainChatHistory.Location = new System.Drawing.Point(17, 88); this.chkRetainChatHistory.Name = "chkRetainChatHistory"; - this.chkRetainChatHistory.Size = new System.Drawing.Size(419, 23); + this.chkRetainChatHistory.Size = new System.Drawing.Size(392, 23); this.chkRetainChatHistory.TabIndex = 3; this.chkRetainChatHistory.Text = "Retain Chat History (Keep chat messages between sessions)"; this.chkRetainChatHistory.UseVisualStyleBackColor = true; @@ -183,7 +469,7 @@ private void InitializeComponent() // numTemperature // this.numTemperature.DecimalPlaces = 1; - this.numTemperature.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.numTemperature.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.numTemperature.Increment = new decimal(new int[] { 1, 0, @@ -196,7 +482,7 @@ private void InitializeComponent() 0, 0}); this.numTemperature.Name = "numTemperature"; - this.numTemperature.Size = new System.Drawing.Size(60, 26); + this.numTemperature.Size = new System.Drawing.Size(60, 25); this.numTemperature.TabIndex = 1; this.numTemperature.Value = new decimal(new int[] { 2, @@ -207,17 +493,28 @@ private void InitializeComponent() // lblTemperature // this.lblTemperature.AutoSize = true; - this.lblTemperature.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.lblTemperature.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.lblTemperature.Location = new System.Drawing.Point(14, 29); this.lblTemperature.Name = "lblTemperature"; - this.lblTemperature.Size = new System.Drawing.Size(192, 19); + this.lblTemperature.Size = new System.Drawing.Size(179, 19); this.lblTemperature.TabIndex = 0; this.lblTemperature.Text = "Temperature (Randomness):"; // + // chkDynamicToolPrompts + // + this.chkDynamicToolPrompts.AutoSize = true; + this.chkDynamicToolPrompts.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkDynamicToolPrompts.Location = new System.Drawing.Point(17, 200); + this.chkDynamicToolPrompts.Name = "chkDynamicToolPrompts"; + this.chkDynamicToolPrompts.Size = new System.Drawing.Size(413, 23); + this.chkDynamicToolPrompts.TabIndex = 7; + this.chkDynamicToolPrompts.Text = "Dynamically update system prompt with tool descriptions"; + this.chkDynamicToolPrompts.UseVisualStyleBackColor = true; + // // saveButton // - this.saveButton.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.saveButton.Location = new System.Drawing.Point(236, 531); + this.saveButton.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.saveButton.Location = new System.Drawing.Point(452, 450); this.saveButton.Name = "saveButton"; this.saveButton.Size = new System.Drawing.Size(98, 30); this.saveButton.TabIndex = 2; @@ -227,8 +524,8 @@ private void InitializeComponent() // // cancelButton // - this.cancelButton.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.cancelButton.Location = new System.Drawing.Point(340, 531); + this.cancelButton.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.cancelButton.Location = new System.Drawing.Point(556, 450); this.cancelButton.Name = "cancelButton"; this.cancelButton.Size = new System.Drawing.Size(98, 30); this.cancelButton.TabIndex = 3; @@ -236,72 +533,808 @@ private void InitializeComponent() this.cancelButton.UseVisualStyleBackColor = true; this.cancelButton.Click += new System.EventHandler(this.cancelButton_Click_1); // - // groupBoxSystemPrompt + // groupBoxSpeechRecognition + // + this.groupBoxSpeechRecognition.Controls.Add(this.indicatorSpeech); + this.groupBoxSpeechRecognition.Controls.Add(this.comboSpeechLanguage); + this.groupBoxSpeechRecognition.Controls.Add(this.lblSpeechLanguage); + this.groupBoxSpeechRecognition.Controls.Add(this.chkEnableSpeechRecognition); + this.groupBoxSpeechRecognition.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.groupBoxSpeechRecognition.Location = new System.Drawing.Point(6, 6); + this.groupBoxSpeechRecognition.Name = "groupBoxSpeechRecognition"; + this.groupBoxSpeechRecognition.Size = new System.Drawing.Size(436, 92); + this.groupBoxSpeechRecognition.TabIndex = 5; + this.groupBoxSpeechRecognition.TabStop = false; + this.groupBoxSpeechRecognition.Text = "Speech Recognition"; + // + // indicatorSpeech + // + this.indicatorSpeech.BackColor = System.Drawing.Color.Transparent; + this.indicatorSpeech.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorSpeech.Location = new System.Drawing.Point(390, 29); + this.indicatorSpeech.Name = "indicatorSpeech"; + this.indicatorSpeech.Size = new System.Drawing.Size(24, 24); + this.indicatorSpeech.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorSpeech.TabIndex = 13; + this.indicatorSpeech.TabStop = false; + // + // comboSpeechLanguage + // + this.comboSpeechLanguage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboSpeechLanguage.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.comboSpeechLanguage.FormattingEnabled = true; + this.comboSpeechLanguage.Items.AddRange(new object[] { + "en-US", + "en-GB", + "en-AU", + "fr-FR", + "es-ES", + "de-DE", + "it-IT", + "ja-JP", + "zh-CN", + "ru-RU"}); + this.comboSpeechLanguage.Location = new System.Drawing.Point(180, 56); + this.comboSpeechLanguage.Name = "comboSpeechLanguage"; + this.comboSpeechLanguage.Size = new System.Drawing.Size(239, 25); + this.comboSpeechLanguage.TabIndex = 2; + // + // lblSpeechLanguage + // + this.lblSpeechLanguage.AutoSize = true; + this.lblSpeechLanguage.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.lblSpeechLanguage.Location = new System.Drawing.Point(14, 59); + this.lblSpeechLanguage.Name = "lblSpeechLanguage"; + this.lblSpeechLanguage.Size = new System.Drawing.Size(148, 19); + this.lblSpeechLanguage.TabIndex = 1; + this.lblSpeechLanguage.Text = "Recognition Language:"; + // + // chkEnableSpeechRecognition + // + this.chkEnableSpeechRecognition.AutoSize = true; + this.chkEnableSpeechRecognition.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkEnableSpeechRecognition.Location = new System.Drawing.Point(17, 29); + this.chkEnableSpeechRecognition.Name = "chkEnableSpeechRecognition"; + this.chkEnableSpeechRecognition.Size = new System.Drawing.Size(191, 23); + this.chkEnableSpeechRecognition.TabIndex = 0; + this.chkEnableSpeechRecognition.Text = "Enable Speech Recognition"; + this.chkEnableSpeechRecognition.UseVisualStyleBackColor = true; + this.chkEnableSpeechRecognition.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); + // + // groupBoxVoiceCommands + // + this.groupBoxVoiceCommands.Controls.Add(this.indicatorVoiceCmd); + this.groupBoxVoiceCommands.Controls.Add(this.txtVoiceCommandPhrase); + this.groupBoxVoiceCommands.Controls.Add(this.lblVoiceCommandPhrase); + this.groupBoxVoiceCommands.Controls.Add(this.chkEnableVoiceCommands); + this.groupBoxVoiceCommands.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.groupBoxVoiceCommands.Location = new System.Drawing.Point(6, 104); + this.groupBoxVoiceCommands.Name = "groupBoxVoiceCommands"; + this.groupBoxVoiceCommands.Size = new System.Drawing.Size(436, 88); + this.groupBoxVoiceCommands.TabIndex = 6; + this.groupBoxVoiceCommands.TabStop = false; + this.groupBoxVoiceCommands.Text = "Voice Commands"; + // + // indicatorVoiceCmd + // + this.indicatorVoiceCmd.BackColor = System.Drawing.Color.Transparent; + this.indicatorVoiceCmd.Font = new System.Drawing.Font("Segoe UI Symbol", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.indicatorVoiceCmd.Location = new System.Drawing.Point(390, 29); + this.indicatorVoiceCmd.Name = "indicatorVoiceCmd"; + this.indicatorVoiceCmd.Size = new System.Drawing.Size(24, 24); + this.indicatorVoiceCmd.SizeMode = System.Windows.Forms.PictureBoxSizeMode.CenterImage; + this.indicatorVoiceCmd.TabIndex = 14; + this.indicatorVoiceCmd.TabStop = false; + // + // txtVoiceCommandPhrase + // + this.txtVoiceCommandPhrase.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.txtVoiceCommandPhrase.Location = new System.Drawing.Point(180, 56); + this.txtVoiceCommandPhrase.Name = "txtVoiceCommandPhrase"; + this.txtVoiceCommandPhrase.Size = new System.Drawing.Size(239, 25); + this.txtVoiceCommandPhrase.TabIndex = 2; + // + // lblVoiceCommandPhrase + // + this.lblVoiceCommandPhrase.AutoSize = true; + this.lblVoiceCommandPhrase.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.lblVoiceCommandPhrase.Location = new System.Drawing.Point(14, 59); + this.lblVoiceCommandPhrase.Name = "lblVoiceCommandPhrase"; + this.lblVoiceCommandPhrase.Size = new System.Drawing.Size(157, 19); + this.lblVoiceCommandPhrase.TabIndex = 1; + this.lblVoiceCommandPhrase.Text = "Voice Command Phrase:"; + // + // chkEnableVoiceCommands + // + this.chkEnableVoiceCommands.AutoSize = true; + this.chkEnableVoiceCommands.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkEnableVoiceCommands.Location = new System.Drawing.Point(17, 29); + this.chkEnableVoiceCommands.Name = "chkEnableVoiceCommands"; + this.chkEnableVoiceCommands.Size = new System.Drawing.Size(178, 23); + this.chkEnableVoiceCommands.TabIndex = 0; + this.chkEnableVoiceCommands.Text = "Enable Voice Commands"; + this.chkEnableVoiceCommands.UseVisualStyleBackColor = true; + this.chkEnableVoiceCommands.CheckedChanged += new System.EventHandler(this.chkPluginStatus_CheckedChanged); + // + // tabControlMain + // + this.tabControlMain.Controls.Add(this.tabPlugins); + this.tabControlMain.Controls.Add(this.tabAISettings); + this.tabControlMain.Controls.Add(this.tabVoice); + this.tabControlMain.Controls.Add(this.tabCoordinator); + this.tabControlMain.Controls.Add(this.tabPlanner); + this.tabControlMain.Controls.Add(this.tabActioner); + this.tabControlMain.Controls.Add(this.tabAppearance); + this.tabControlMain.Controls.Add(this.tabProfiles); + this.tabControlMain.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.tabControlMain.Location = new System.Drawing.Point(12, 45); + this.tabControlMain.Name = "tabControlMain"; + this.tabControlMain.SelectedIndex = 0; + this.tabControlMain.Size = new System.Drawing.Size(642, 393); + this.tabControlMain.TabIndex = 8; + // + // tabPlugins + // + this.tabPlugins.Controls.Add(this.groupBoxPlugins); + this.tabPlugins.Controls.Add(this.enablePluginLoggingCheckBox); + this.tabPlugins.Controls.Add(this.chkEnableRemoteControl); + this.tabPlugins.Controls.Add(this.lblRemotePort); + this.tabPlugins.Controls.Add(this.numRemotePort); + this.tabPlugins.Location = new System.Drawing.Point(4, 26); + this.tabPlugins.Name = "tabPlugins"; + this.tabPlugins.Padding = new System.Windows.Forms.Padding(3); + this.tabPlugins.Size = new System.Drawing.Size(634, 363); + this.tabPlugins.TabIndex = 0; + this.tabPlugins.Text = "Plugins"; + this.tabPlugins.UseVisualStyleBackColor = true; + // + // tabAISettings + // + this.tabAISettings.Controls.Add(this.groupBoxSettings); + this.tabAISettings.Location = new System.Drawing.Point(4, 26); + this.tabAISettings.Name = "tabAISettings"; + this.tabAISettings.Padding = new System.Windows.Forms.Padding(3); + this.tabAISettings.Size = new System.Drawing.Size(634, 363); + this.tabAISettings.TabIndex = 1; + this.tabAISettings.Text = "AI Settings"; + this.tabAISettings.UseVisualStyleBackColor = true; + // + // tabVoice + // + this.tabVoice.Controls.Add(this.groupBoxSpeechRecognition); + this.tabVoice.Controls.Add(this.groupBoxVoiceCommands); + this.tabVoice.Location = new System.Drawing.Point(4, 26); + this.tabVoice.Name = "tabVoice"; + this.tabVoice.Size = new System.Drawing.Size(634, 363); + this.tabVoice.TabIndex = 2; + this.tabVoice.Text = "Voice"; + this.tabVoice.UseVisualStyleBackColor = true; + // + // tabCoordinator + // + this.tabCoordinator.Controls.Add(this.grpCoordinatorConfig); + this.tabCoordinator.Location = new System.Drawing.Point(4, 26); + this.tabCoordinator.Name = "tabCoordinator"; + this.tabCoordinator.Size = new System.Drawing.Size(634, 363); + this.tabCoordinator.TabIndex = 3; + this.tabCoordinator.Text = "Coordinator"; + this.tabCoordinator.UseVisualStyleBackColor = true; + // + // grpCoordinatorConfig + // + this.grpCoordinatorConfig.Controls.Add(this.lblCoordinatorPrompt); + this.grpCoordinatorConfig.Controls.Add(this.txtCoordinatorSystemPrompt); + this.grpCoordinatorConfig.Controls.Add(this.chkUseCustomCoordinatorConfig); + this.grpCoordinatorConfig.Controls.Add(this.comboCoordinatorConfig); + this.grpCoordinatorConfig.Controls.Add(this.btnConfigureCoordinator); + this.grpCoordinatorConfig.Controls.Add(this.btnResetCoordinator); + this.grpCoordinatorConfig.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.grpCoordinatorConfig.Location = new System.Drawing.Point(6, 6); + this.grpCoordinatorConfig.Name = "grpCoordinatorConfig"; + this.grpCoordinatorConfig.Size = new System.Drawing.Size(577, 351); + this.grpCoordinatorConfig.TabIndex = 15; + this.grpCoordinatorConfig.TabStop = false; + this.grpCoordinatorConfig.Text = "Coordinator Configuration"; + // + // lblCoordinatorPrompt + // + this.lblCoordinatorPrompt.AutoSize = true; + this.lblCoordinatorPrompt.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.lblCoordinatorPrompt.Location = new System.Drawing.Point(8, 24); + this.lblCoordinatorPrompt.Name = "lblCoordinatorPrompt"; + this.lblCoordinatorPrompt.Size = new System.Drawing.Size(177, 19); + this.lblCoordinatorPrompt.TabIndex = 16; + this.lblCoordinatorPrompt.Text = "Coordinator System Prompt:"; + // + // txtCoordinatorSystemPrompt + // + this.txtCoordinatorSystemPrompt.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.txtCoordinatorSystemPrompt.Location = new System.Drawing.Point(8, 46); + this.txtCoordinatorSystemPrompt.Multiline = true; + this.txtCoordinatorSystemPrompt.Name = "txtCoordinatorSystemPrompt"; + this.txtCoordinatorSystemPrompt.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; + this.txtCoordinatorSystemPrompt.Size = new System.Drawing.Size(555, 200); + this.txtCoordinatorSystemPrompt.TabIndex = 17; + // + // chkUseCustomCoordinatorConfig + // + this.chkUseCustomCoordinatorConfig.AutoSize = true; + this.chkUseCustomCoordinatorConfig.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkUseCustomCoordinatorConfig.Location = new System.Drawing.Point(8, 284); + this.chkUseCustomCoordinatorConfig.Name = "chkUseCustomCoordinatorConfig"; + this.chkUseCustomCoordinatorConfig.Size = new System.Drawing.Size(261, 23); + this.chkUseCustomCoordinatorConfig.TabIndex = 20; + this.chkUseCustomCoordinatorConfig.Text = "Use Custom Coordinator Model Config"; + this.chkUseCustomCoordinatorConfig.UseVisualStyleBackColor = true; + // + // comboCoordinatorConfig + // + this.comboCoordinatorConfig.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboCoordinatorConfig.Enabled = false; + this.comboCoordinatorConfig.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.comboCoordinatorConfig.FormattingEnabled = true; + this.comboCoordinatorConfig.Location = new System.Drawing.Point(275, 284); + this.comboCoordinatorConfig.Name = "comboCoordinatorConfig"; + this.comboCoordinatorConfig.Size = new System.Drawing.Size(181, 25); + this.comboCoordinatorConfig.TabIndex = 21; // - this.groupBoxSystemPrompt.Controls.Add(this.txtSystemPrompt); - this.groupBoxSystemPrompt.Font = new System.Drawing.Font("Comic Sans MS", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.groupBoxSystemPrompt.Location = new System.Drawing.Point(12, 379); - this.groupBoxSystemPrompt.Name = "groupBoxSystemPrompt"; - this.groupBoxSystemPrompt.Size = new System.Drawing.Size(436, 146); - this.groupBoxSystemPrompt.TabIndex = 4; - this.groupBoxSystemPrompt.TabStop = false; - this.groupBoxSystemPrompt.Text = "System Prompt"; + // btnConfigureCoordinator // - // txtSystemPrompt + this.btnConfigureCoordinator.Enabled = false; + this.btnConfigureCoordinator.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.btnConfigureCoordinator.Location = new System.Drawing.Point(463, 284); + this.btnConfigureCoordinator.Name = "btnConfigureCoordinator"; + this.btnConfigureCoordinator.Size = new System.Drawing.Size(100, 30); + this.btnConfigureCoordinator.TabIndex = 22; + this.btnConfigureCoordinator.Text = "Configure"; + this.btnConfigureCoordinator.UseVisualStyleBackColor = true; // - this.txtSystemPrompt.Font = new System.Drawing.Font("Comic Sans MS", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.txtSystemPrompt.Location = new System.Drawing.Point(17, 29); - this.txtSystemPrompt.Multiline = true; - this.txtSystemPrompt.Name = "txtSystemPrompt"; - this.txtSystemPrompt.Size = new System.Drawing.Size(402, 100); - this.txtSystemPrompt.TabIndex = 0; + // btnResetCoordinator + // + this.btnResetCoordinator.Font = new System.Drawing.Font("Segoe UI", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.btnResetCoordinator.Location = new System.Drawing.Point(8, 252); + this.btnResetCoordinator.Name = "btnResetCoordinator"; + this.btnResetCoordinator.Size = new System.Drawing.Size(120, 26); + this.btnResetCoordinator.TabIndex = 28; + this.btnResetCoordinator.Text = "🔄 Reset to Default"; + this.btnResetCoordinator.UseVisualStyleBackColor = true; + this.btnResetCoordinator.Click += new System.EventHandler(this.btnResetCoordinator_Click); + // + // tabPlanner + // + this.tabPlanner.Controls.Add(this.grpPlannerConfig); + this.tabPlanner.Location = new System.Drawing.Point(4, 26); + this.tabPlanner.Name = "tabPlanner"; + this.tabPlanner.Size = new System.Drawing.Size(634, 363); + this.tabPlanner.TabIndex = 6; + this.tabPlanner.Text = "Planner"; + this.tabPlanner.UseVisualStyleBackColor = true; + // + // grpPlannerConfig + // + this.grpPlannerConfig.Controls.Add(this.lblPlannerPrompt); + this.grpPlannerConfig.Controls.Add(this.txtPlannerSystemPrompt); + this.grpPlannerConfig.Controls.Add(this.chkUseCustomPlannerConfig); + this.grpPlannerConfig.Controls.Add(this.comboPlannerConfig); + this.grpPlannerConfig.Controls.Add(this.btnConfigurePlanner); + this.grpPlannerConfig.Controls.Add(this.btnResetPlanner); + this.grpPlannerConfig.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.grpPlannerConfig.Location = new System.Drawing.Point(6, 6); + this.grpPlannerConfig.Name = "grpPlannerConfig"; + this.grpPlannerConfig.Size = new System.Drawing.Size(577, 351); + this.grpPlannerConfig.TabIndex = 15; + this.grpPlannerConfig.TabStop = false; + this.grpPlannerConfig.Text = "Planner Configuration"; + // + // lblPlannerPrompt + // + this.lblPlannerPrompt.AutoSize = true; + this.lblPlannerPrompt.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.lblPlannerPrompt.Location = new System.Drawing.Point(8, 24); + this.lblPlannerPrompt.Name = "lblPlannerPrompt"; + this.lblPlannerPrompt.Size = new System.Drawing.Size(156, 19); + this.lblPlannerPrompt.TabIndex = 16; + this.lblPlannerPrompt.Text = "Planner System Prompt:"; + // + // txtPlannerSystemPrompt + // + this.txtPlannerSystemPrompt.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.txtPlannerSystemPrompt.Location = new System.Drawing.Point(8, 46); + this.txtPlannerSystemPrompt.Multiline = true; + this.txtPlannerSystemPrompt.Name = "txtPlannerSystemPrompt"; + this.txtPlannerSystemPrompt.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; + this.txtPlannerSystemPrompt.Size = new System.Drawing.Size(555, 200); + this.txtPlannerSystemPrompt.TabIndex = 17; + // + // chkUseCustomPlannerConfig + // + this.chkUseCustomPlannerConfig.AutoSize = true; + this.chkUseCustomPlannerConfig.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkUseCustomPlannerConfig.Location = new System.Drawing.Point(8, 284); + this.chkUseCustomPlannerConfig.Name = "chkUseCustomPlannerConfig"; + this.chkUseCustomPlannerConfig.Size = new System.Drawing.Size(240, 23); + this.chkUseCustomPlannerConfig.TabIndex = 20; + this.chkUseCustomPlannerConfig.Text = "Use Custom Planner Model Config"; + this.chkUseCustomPlannerConfig.UseVisualStyleBackColor = true; + this.chkUseCustomPlannerConfig.CheckedChanged += new System.EventHandler(this.chkUseCustomPlannerConfig_CheckedChanged); + // + // comboPlannerConfig + // + this.comboPlannerConfig.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboPlannerConfig.Enabled = false; + this.comboPlannerConfig.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.comboPlannerConfig.FormattingEnabled = true; + this.comboPlannerConfig.Location = new System.Drawing.Point(275, 284); + this.comboPlannerConfig.Name = "comboPlannerConfig"; + this.comboPlannerConfig.Size = new System.Drawing.Size(181, 25); + this.comboPlannerConfig.TabIndex = 21; + // + // btnConfigurePlanner + // + this.btnConfigurePlanner.Enabled = false; + this.btnConfigurePlanner.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.btnConfigurePlanner.Location = new System.Drawing.Point(463, 284); + this.btnConfigurePlanner.Name = "btnConfigurePlanner"; + this.btnConfigurePlanner.Size = new System.Drawing.Size(100, 30); + this.btnConfigurePlanner.TabIndex = 22; + this.btnConfigurePlanner.Text = "Configure"; + this.btnConfigurePlanner.UseVisualStyleBackColor = true; + this.btnConfigurePlanner.Click += new System.EventHandler(this.btnConfigurePlanner_Click); + // + // btnResetPlanner + // + this.btnResetPlanner.Font = new System.Drawing.Font("Segoe UI", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.btnResetPlanner.Location = new System.Drawing.Point(8, 252); + this.btnResetPlanner.Name = "btnResetPlanner"; + this.btnResetPlanner.Size = new System.Drawing.Size(120, 26); + this.btnResetPlanner.TabIndex = 27; + this.btnResetPlanner.Text = "🔄 Reset to Default"; + this.btnResetPlanner.UseVisualStyleBackColor = true; + this.btnResetPlanner.Click += new System.EventHandler(this.btnResetPlanner_Click); + // + // tabActioner + // + this.tabActioner.Controls.Add(this.grpActionerConfig); + this.tabActioner.Location = new System.Drawing.Point(4, 26); + this.tabActioner.Name = "tabActioner"; + this.tabActioner.Size = new System.Drawing.Size(634, 363); + this.tabActioner.TabIndex = 7; + this.tabActioner.Text = "Actioner"; + this.tabActioner.UseVisualStyleBackColor = true; + // + // grpActionerConfig + // + this.grpActionerConfig.Controls.Add(this.lblActionerPrompt); + this.grpActionerConfig.Controls.Add(this.txtActionerSystemPrompt); + this.grpActionerConfig.Controls.Add(this.chkUseCustomExecutorConfig); + this.grpActionerConfig.Controls.Add(this.comboActionerConfig); + this.grpActionerConfig.Controls.Add(this.btnConfigureActioner); + this.grpActionerConfig.Controls.Add(this.btnResetActioner); + this.grpActionerConfig.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.grpActionerConfig.Location = new System.Drawing.Point(6, 6); + this.grpActionerConfig.Name = "grpActionerConfig"; + this.grpActionerConfig.Size = new System.Drawing.Size(577, 351); + this.grpActionerConfig.TabIndex = 15; + this.grpActionerConfig.TabStop = false; + this.grpActionerConfig.Text = "Actioner Configuration"; + // + // lblActionerPrompt + // + this.lblActionerPrompt.AutoSize = true; + this.lblActionerPrompt.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.lblActionerPrompt.Location = new System.Drawing.Point(8, 24); + this.lblActionerPrompt.Name = "lblActionerPrompt"; + this.lblActionerPrompt.Size = new System.Drawing.Size(477, 19); + this.lblActionerPrompt.TabIndex = 18; + this.lblActionerPrompt.Text = "Actioner System Prompt (also used as system prompt in single agent mode):"; + // + // txtActionerSystemPrompt + // + this.txtActionerSystemPrompt.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.txtActionerSystemPrompt.Location = new System.Drawing.Point(8, 46); + this.txtActionerSystemPrompt.Multiline = true; + this.txtActionerSystemPrompt.Name = "txtActionerSystemPrompt"; + this.txtActionerSystemPrompt.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; + this.txtActionerSystemPrompt.Size = new System.Drawing.Size(555, 200); + this.txtActionerSystemPrompt.TabIndex = 19; + // + // chkUseCustomExecutorConfig + // + this.chkUseCustomExecutorConfig.AutoSize = true; + this.chkUseCustomExecutorConfig.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.chkUseCustomExecutorConfig.Location = new System.Drawing.Point(8, 284); + this.chkUseCustomExecutorConfig.Name = "chkUseCustomExecutorConfig"; + this.chkUseCustomExecutorConfig.Size = new System.Drawing.Size(246, 23); + this.chkUseCustomExecutorConfig.TabIndex = 23; + this.chkUseCustomExecutorConfig.Text = "Use Custom Actioner Model Config"; + this.chkUseCustomExecutorConfig.UseVisualStyleBackColor = true; + this.chkUseCustomExecutorConfig.CheckedChanged += new System.EventHandler(this.chkUseCustomExecutorConfig_CheckedChanged); + // + // comboActionerConfig + // + this.comboActionerConfig.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.comboActionerConfig.Enabled = false; + this.comboActionerConfig.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.comboActionerConfig.FormattingEnabled = true; + this.comboActionerConfig.Location = new System.Drawing.Point(275, 284); + this.comboActionerConfig.Name = "comboActionerConfig"; + this.comboActionerConfig.Size = new System.Drawing.Size(181, 25); + this.comboActionerConfig.TabIndex = 24; + // + // btnConfigureActioner + // + this.btnConfigureActioner.Enabled = false; + this.btnConfigureActioner.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.btnConfigureActioner.Location = new System.Drawing.Point(463, 284); + this.btnConfigureActioner.Name = "btnConfigureActioner"; + this.btnConfigureActioner.Size = new System.Drawing.Size(100, 30); + this.btnConfigureActioner.TabIndex = 25; + this.btnConfigureActioner.Text = "Configure"; + this.btnConfigureActioner.UseVisualStyleBackColor = true; + this.btnConfigureActioner.Click += new System.EventHandler(this.btnConfigureActioner_Click); + // + // btnResetActioner + // + this.btnResetActioner.Font = new System.Drawing.Font("Segoe UI", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.btnResetActioner.Location = new System.Drawing.Point(8, 252); + this.btnResetActioner.Name = "btnResetActioner"; + this.btnResetActioner.Size = new System.Drawing.Size(120, 26); + this.btnResetActioner.TabIndex = 26; + this.btnResetActioner.Text = "🔄 Reset to Default"; + this.btnResetActioner.UseVisualStyleBackColor = true; + this.btnResetActioner.Click += new System.EventHandler(this.btnResetActioner_Click); + // + // tabAppearance + // + this.tabAppearance.Controls.Add(this.groupBoxTheme); + this.tabAppearance.Location = new System.Drawing.Point(4, 26); + this.tabAppearance.Name = "tabAppearance"; + this.tabAppearance.Size = new System.Drawing.Size(634, 363); + this.tabAppearance.TabIndex = 4; + this.tabAppearance.Text = "Appearance"; + this.tabAppearance.UseVisualStyleBackColor = true; + // + // groupBoxTheme + // + this.groupBoxTheme.Controls.Add(this.labelTheme); + this.groupBoxTheme.Controls.Add(this.cbTheme); + this.groupBoxTheme.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.groupBoxTheme.Location = new System.Drawing.Point(6, 6); + this.groupBoxTheme.Name = "groupBoxTheme"; + this.groupBoxTheme.Size = new System.Drawing.Size(436, 75); + this.groupBoxTheme.TabIndex = 0; + this.groupBoxTheme.TabStop = false; + this.groupBoxTheme.Text = "Theme Settings"; + // + // labelTheme + // + this.labelTheme.AutoSize = true; + this.labelTheme.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.labelTheme.Location = new System.Drawing.Point(14, 34); + this.labelTheme.Name = "labelTheme"; + this.labelTheme.Size = new System.Drawing.Size(104, 19); + this.labelTheme.TabIndex = 1; + this.labelTheme.Text = "Current Theme:"; + // + // cbTheme + // + this.cbTheme.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.cbTheme.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.cbTheme.FormattingEnabled = true; + this.cbTheme.Items.AddRange(new object[] { + "Light", + "Dark"}); + this.cbTheme.Location = new System.Drawing.Point(122, 31); + this.cbTheme.Name = "cbTheme"; + this.cbTheme.Size = new System.Drawing.Size(150, 25); + this.cbTheme.TabIndex = 0; + this.cbTheme.SelectedIndexChanged += new System.EventHandler(this.cbTheme_SelectedIndexChanged); + // + // tabProfiles + // + this.tabProfiles.Controls.Add(this.groupBoxProfiles); + this.tabProfiles.Location = new System.Drawing.Point(4, 26); + this.tabProfiles.Name = "tabProfiles"; + this.tabProfiles.Size = new System.Drawing.Size(634, 363); + this.tabProfiles.TabIndex = 5; + this.tabProfiles.Text = "Profiles"; + this.tabProfiles.UseVisualStyleBackColor = true; + // + // groupBoxProfiles + // + this.groupBoxProfiles.Controls.Add(this.btnDeleteProfile); + this.groupBoxProfiles.Controls.Add(this.btnLoadProfile); + this.groupBoxProfiles.Controls.Add(this.btnSaveProfile); + this.groupBoxProfiles.Controls.Add(this.labelProfile); + this.groupBoxProfiles.Controls.Add(this.cmbProfiles); + this.groupBoxProfiles.Font = new System.Drawing.Font("Segoe UI", 12F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.groupBoxProfiles.Location = new System.Drawing.Point(6, 6); + this.groupBoxProfiles.Name = "groupBoxProfiles"; + this.groupBoxProfiles.Size = new System.Drawing.Size(436, 136); + this.groupBoxProfiles.TabIndex = 0; + this.groupBoxProfiles.TabStop = false; + this.groupBoxProfiles.Text = "Configuration Profiles"; + // + // btnDeleteProfile + // + this.btnDeleteProfile.Font = new System.Drawing.Font("Segoe UI", 10F); + this.btnDeleteProfile.Location = new System.Drawing.Point(297, 92); + this.btnDeleteProfile.Name = "btnDeleteProfile"; + this.btnDeleteProfile.Size = new System.Drawing.Size(121, 30); + this.btnDeleteProfile.TabIndex = 4; + this.btnDeleteProfile.Text = "Delete Profile"; + this.btnDeleteProfile.UseVisualStyleBackColor = true; + this.btnDeleteProfile.Click += new System.EventHandler(this.btnDeleteProfile_Click); + // + // btnLoadProfile + // + this.btnLoadProfile.Font = new System.Drawing.Font("Segoe UI", 10F); + this.btnLoadProfile.Location = new System.Drawing.Point(156, 92); + this.btnLoadProfile.Name = "btnLoadProfile"; + this.btnLoadProfile.Size = new System.Drawing.Size(121, 30); + this.btnLoadProfile.TabIndex = 3; + this.btnLoadProfile.Text = "Load Profile"; + this.btnLoadProfile.UseVisualStyleBackColor = true; + this.btnLoadProfile.Click += new System.EventHandler(this.btnLoadProfile_Click); + // + // btnSaveProfile + // + this.btnSaveProfile.Font = new System.Drawing.Font("Segoe UI", 10F); + this.btnSaveProfile.Location = new System.Drawing.Point(17, 92); + this.btnSaveProfile.Name = "btnSaveProfile"; + this.btnSaveProfile.Size = new System.Drawing.Size(121, 30); + this.btnSaveProfile.TabIndex = 2; + this.btnSaveProfile.Text = "Save As New..."; + this.btnSaveProfile.UseVisualStyleBackColor = true; + this.btnSaveProfile.Click += new System.EventHandler(this.btnSaveProfile_Click); + // + // labelProfile + // + this.labelProfile.AutoSize = true; + this.labelProfile.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.labelProfile.Location = new System.Drawing.Point(14, 44); + this.labelProfile.Name = "labelProfile"; + this.labelProfile.Size = new System.Drawing.Size(89, 19); + this.labelProfile.TabIndex = 1; + this.labelProfile.Text = "Select Profile:"; + // + // cmbProfiles + // + this.cmbProfiles.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; + this.cmbProfiles.Font = new System.Drawing.Font("Segoe UI", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.cmbProfiles.FormattingEnabled = true; + this.cmbProfiles.Location = new System.Drawing.Point(122, 41); + this.cmbProfiles.Name = "cmbProfiles"; + this.cmbProfiles.Size = new System.Drawing.Size(296, 25); + this.cmbProfiles.TabIndex = 0; + // + // imageListStatus + // + this.imageListStatus.ColorDepth = System.Windows.Forms.ColorDepth.Depth8Bit; + this.imageListStatus.ImageSize = new System.Drawing.Size(16, 16); + this.imageListStatus.TransparentColor = System.Drawing.Color.Transparent; + // + // panelSearch + // + this.panelSearch.Controls.Add(this.searchResultLabel); + this.panelSearch.Controls.Add(this.txtSearchSettings); + this.panelSearch.Controls.Add(this.lblSearchSettings); + this.panelSearch.Location = new System.Drawing.Point(12, 8); + this.panelSearch.Name = "panelSearch"; + this.panelSearch.Size = new System.Drawing.Size(642, 31); + this.panelSearch.TabIndex = 9; + // + // searchResultLabel + // + this.searchResultLabel.AutoSize = true; + this.searchResultLabel.Font = new System.Drawing.Font("Segoe UI", 9F); + this.searchResultLabel.ForeColor = System.Drawing.Color.RoyalBlue; + this.searchResultLabel.Location = new System.Drawing.Point(460, 8); + this.searchResultLabel.Name = "searchResultLabel"; + this.searchResultLabel.Size = new System.Drawing.Size(0, 15); + this.searchResultLabel.TabIndex = 2; + this.searchResultLabel.Visible = false; + // + // txtSearchSettings + // + this.txtSearchSettings.Font = new System.Drawing.Font("Segoe UI", 10F); + this.txtSearchSettings.Location = new System.Drawing.Point(83, 3); + this.txtSearchSettings.Name = "txtSearchSettings"; + this.txtSearchSettings.Size = new System.Drawing.Size(365, 25); + this.txtSearchSettings.TabIndex = 1; + this.txtSearchSettings.TextChanged += new System.EventHandler(this.txtSearchSettings_TextChanged); + // + // lblSearchSettings + // + this.lblSearchSettings.AutoSize = true; + this.lblSearchSettings.Font = new System.Drawing.Font("Segoe UI", 10F); + this.lblSearchSettings.Location = new System.Drawing.Point(3, 6); + this.lblSearchSettings.Name = "lblSearchSettings"; + this.lblSearchSettings.Size = new System.Drawing.Size(73, 19); + this.lblSearchSettings.TabIndex = 0; + this.lblSearchSettings.Text = "Search for:"; + // + // saveNotification + // + this.saveNotification.BackColor = System.Drawing.Color.LightGreen; + this.saveNotification.BorderStyle = System.Windows.Forms.BorderStyle.FixedSingle; + this.saveNotification.Controls.Add(this.lblSaveNotification); + this.saveNotification.Location = new System.Drawing.Point(254, 390); + this.saveNotification.Name = "saveNotification"; + this.saveNotification.Size = new System.Drawing.Size(250, 45); + this.saveNotification.TabIndex = 10; + this.saveNotification.Visible = false; + // + // lblSaveNotification + // + this.lblSaveNotification.Dock = System.Windows.Forms.DockStyle.Fill; + this.lblSaveNotification.Font = new System.Drawing.Font("Segoe UI", 11F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); + this.lblSaveNotification.ForeColor = System.Drawing.Color.DarkGreen; + this.lblSaveNotification.Location = new System.Drawing.Point(0, 0); + this.lblSaveNotification.Name = "lblSaveNotification"; + this.lblSaveNotification.Size = new System.Drawing.Size(248, 43); + this.lblSaveNotification.TabIndex = 0; + this.lblSaveNotification.Text = "✓ Settings saved successfully!"; + this.lblSaveNotification.TextAlign = System.Drawing.ContentAlignment.MiddleCenter; + // + // saveNotificationTimer + // + this.saveNotificationTimer.Interval = 3000; + this.saveNotificationTimer.Tick += new System.EventHandler(this.saveNotificationTimer_Tick); // // ToolConfigForm // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; - this.ClientSize = new System.Drawing.Size(450, 571); - this.Controls.Add(this.enablePluginLoggingCheckBox); - this.Controls.Add(this.groupBoxSystemPrompt); - this.Controls.Add(this.cancelButton); + this.ClientSize = new System.Drawing.Size(668, 497); + this.Controls.Add(this.tabControlMain); + this.Controls.Add(this.panelSearch); + this.Controls.Add(this.saveNotification); this.Controls.Add(this.saveButton); - this.Controls.Add(this.groupBoxSettings); - this.Controls.Add(this.groupBoxPlugins); + this.Controls.Add(this.cancelButton); this.MaximizeBox = false; this.MinimizeBox = false; this.Name = "ToolConfigForm"; + this.ShowIcon = false; this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; this.Text = "Tool Configuration"; + this.Load += new System.EventHandler(this.ToolConfigForm_Load); this.groupBoxPlugins.ResumeLayout(false); this.groupBoxPlugins.PerformLayout(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorWindow)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorMouse)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorKeyboard)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorScreenCapture)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorPowerShell)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorCMD)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorPlaywright)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.numRemotePort)).EndInit(); this.groupBoxSettings.ResumeLayout(false); this.groupBoxSettings.PerformLayout(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorAutoInvoke)).EndInit(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorMultiAgent)).EndInit(); ((System.ComponentModel.ISupportInitialize)(this.numTemperature)).EndInit(); - this.groupBoxSystemPrompt.ResumeLayout(false); - this.groupBoxSystemPrompt.PerformLayout(); + this.groupBoxSpeechRecognition.ResumeLayout(false); + this.groupBoxSpeechRecognition.PerformLayout(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorSpeech)).EndInit(); + this.groupBoxVoiceCommands.ResumeLayout(false); + this.groupBoxVoiceCommands.PerformLayout(); + ((System.ComponentModel.ISupportInitialize)(this.indicatorVoiceCmd)).EndInit(); + this.tabControlMain.ResumeLayout(false); + this.tabPlugins.ResumeLayout(false); + this.tabPlugins.PerformLayout(); + this.tabAISettings.ResumeLayout(false); + this.tabVoice.ResumeLayout(false); + this.tabCoordinator.ResumeLayout(false); + this.grpCoordinatorConfig.ResumeLayout(false); + this.grpCoordinatorConfig.PerformLayout(); + this.tabPlanner.ResumeLayout(false); + this.grpPlannerConfig.ResumeLayout(false); + this.grpPlannerConfig.PerformLayout(); + this.tabActioner.ResumeLayout(false); + this.grpActionerConfig.ResumeLayout(false); + this.grpActionerConfig.PerformLayout(); + this.tabAppearance.ResumeLayout(false); + this.groupBoxTheme.ResumeLayout(false); + this.groupBoxTheme.PerformLayout(); + this.tabProfiles.ResumeLayout(false); + this.groupBoxProfiles.ResumeLayout(false); + this.groupBoxProfiles.PerformLayout(); + this.panelSearch.ResumeLayout(false); + this.panelSearch.PerformLayout(); + this.saveNotification.ResumeLayout(false); this.ResumeLayout(false); - this.PerformLayout(); } #endregion private System.Windows.Forms.GroupBox groupBoxPlugins; + private System.Windows.Forms.PictureBox indicatorWindow; + private System.Windows.Forms.PictureBox indicatorMouse; + private System.Windows.Forms.PictureBox indicatorKeyboard; + private System.Windows.Forms.PictureBox indicatorScreenCapture; + private System.Windows.Forms.PictureBox indicatorPowerShell; + private System.Windows.Forms.PictureBox indicatorCMD; + private System.Windows.Forms.PictureBox indicatorPlaywright; private System.Windows.Forms.CheckBox chkMousePlugin; private System.Windows.Forms.CheckBox chkKeyboardPlugin; private System.Windows.Forms.CheckBox chkScreenCapturePlugin; private System.Windows.Forms.CheckBox chkPowerShellPlugin; private System.Windows.Forms.CheckBox chkCMDPlugin; private System.Windows.Forms.CheckBox chkWindowSelectionPlugin; + private System.Windows.Forms.CheckBox chkPlaywrightPlugin; + private System.Windows.Forms.CheckBox enablePluginLoggingCheckBox; + private System.Windows.Forms.CheckBox chkEnableRemoteControl; + private System.Windows.Forms.NumericUpDown numRemotePort; + private System.Windows.Forms.Label lblRemotePort; private System.Windows.Forms.GroupBox groupBoxSettings; + private System.Windows.Forms.PictureBox indicatorAutoInvoke; + private System.Windows.Forms.PictureBox indicatorMultiAgent; private System.Windows.Forms.NumericUpDown numTemperature; private System.Windows.Forms.Label lblTemperature; private System.Windows.Forms.CheckBox chkAutoInvoke; private System.Windows.Forms.CheckBox chkRetainChatHistory; + private System.Windows.Forms.CheckBox chkMultiAgentMode; + private System.Windows.Forms.CheckBox chkDynamicToolPrompts; private System.Windows.Forms.Button saveButton; private System.Windows.Forms.Button cancelButton; - private System.Windows.Forms.GroupBox groupBoxSystemPrompt; - private System.Windows.Forms.TextBox txtSystemPrompt; - private System.Windows.Forms.CheckBox enablePluginLoggingCheckBox; + private System.Windows.Forms.GroupBox groupBoxSpeechRecognition; + private System.Windows.Forms.PictureBox indicatorSpeech; + private System.Windows.Forms.CheckBox chkEnableSpeechRecognition; + private System.Windows.Forms.Label lblSpeechLanguage; + private System.Windows.Forms.ComboBox comboSpeechLanguage; + private System.Windows.Forms.GroupBox groupBoxVoiceCommands; + private System.Windows.Forms.PictureBox indicatorVoiceCmd; + private System.Windows.Forms.CheckBox chkEnableVoiceCommands; + private System.Windows.Forms.Label lblVoiceCommandPhrase; + private System.Windows.Forms.TextBox txtVoiceCommandPhrase; + private System.Windows.Forms.TabControl tabControlMain; + private System.Windows.Forms.TabPage tabPlugins; + private System.Windows.Forms.TabPage tabAISettings; + private System.Windows.Forms.TabPage tabVoice; + private System.Windows.Forms.TabPage tabCoordinator; + private System.Windows.Forms.GroupBox grpCoordinatorConfig; + private System.Windows.Forms.Label lblCoordinatorPrompt; + private System.Windows.Forms.TextBox txtCoordinatorSystemPrompt; + private System.Windows.Forms.CheckBox chkUseCustomCoordinatorConfig; + private System.Windows.Forms.ComboBox comboCoordinatorConfig; + private System.Windows.Forms.Button btnConfigureCoordinator; + private System.Windows.Forms.Button btnResetCoordinator; + private System.Windows.Forms.TabPage tabPlanner; + private System.Windows.Forms.GroupBox grpPlannerConfig; + private System.Windows.Forms.Label lblPlannerPrompt; + private System.Windows.Forms.TextBox txtPlannerSystemPrompt; + private System.Windows.Forms.CheckBox chkUseCustomPlannerConfig; + private System.Windows.Forms.ComboBox comboPlannerConfig; + private System.Windows.Forms.Button btnConfigurePlanner; + private System.Windows.Forms.Button btnResetPlanner; + private System.Windows.Forms.TabPage tabActioner; + private System.Windows.Forms.GroupBox grpActionerConfig; + private System.Windows.Forms.Label lblActionerPrompt; + private System.Windows.Forms.TextBox txtActionerSystemPrompt; + private System.Windows.Forms.ComboBox comboActionerConfig; + private System.Windows.Forms.Button btnConfigureActioner; + private System.Windows.Forms.Button btnResetActioner; + private System.Windows.Forms.CheckBox chkUseCustomExecutorConfig; + private System.Windows.Forms.TabPage tabAppearance; + private System.Windows.Forms.GroupBox groupBoxTheme; + private System.Windows.Forms.Label labelTheme; + private System.Windows.Forms.ComboBox cbTheme; + private System.Windows.Forms.TabPage tabProfiles; + private System.Windows.Forms.GroupBox groupBoxProfiles; + private System.Windows.Forms.Button btnDeleteProfile; + private System.Windows.Forms.Button btnLoadProfile; + private System.Windows.Forms.Button btnSaveProfile; + private System.Windows.Forms.Label labelProfile; + private System.Windows.Forms.ComboBox cmbProfiles; + private System.Windows.Forms.ImageList imageListStatus; + private System.Windows.Forms.Panel panelSearch; + private System.Windows.Forms.Label searchResultLabel; + private System.Windows.Forms.TextBox txtSearchSettings; + private System.Windows.Forms.Label lblSearchSettings; + private System.Windows.Forms.Panel saveNotification; + private System.Windows.Forms.Label lblSaveNotification; + private System.Windows.Forms.Timer saveNotificationTimer; } } \ No newline at end of file diff --git a/FlowVision/ToolConfigForm.cs b/FlowVision/ToolConfigForm.cs index 83032d8..b2a62bc 100644 --- a/FlowVision/ToolConfigForm.cs +++ b/FlowVision/ToolConfigForm.cs @@ -5,10 +5,12 @@ using System.Drawing; using System.IO; using System.Linq; +using System.Speech.Recognition; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using FlowVision.lib.Classes; +using FlowVision.lib.UI; namespace FlowVision { @@ -17,14 +19,48 @@ public partial class ToolConfigForm : Form private string toolFileName = "toolsconfig"; private ToolConfig _toolConfig; private bool _isNewConfiguration = false; + private ThemeManager _themeManager; + private SettingsProfileManager _profileManager; + private ToolTip _formToolTip; public ToolConfigForm(bool openAsNew = false) { InitializeComponent(); _isNewConfiguration = openAsNew; + // Initialize theme manager + _themeManager = new ThemeManager(); + + // Initialize profile manager + _profileManager = new SettingsProfileManager(); + + // Initialize tooltip + InitializeToolTips(); + + // Populate theme options + cbTheme.Items.Clear(); + cbTheme.Items.Add("Light"); + cbTheme.Items.Add("Dark"); + + // Populate speech recognition language options + PopulateSpeechLanguages(); + + // Populate available API configuration files + PopulateAPIConfigurations(); + + // Populate configuration profiles + PopulateConfigurationProfiles(); + LoadToolConfig(); + // Apply current theme + ApplyTheme(_themeManager.CurrentTheme); + + // Make sure the actioner prompt is always editable by forcing the tab to be enabled + tabActioner.Enabled = true; + txtActionerSystemPrompt.Enabled = true; + lblActionerPrompt.Enabled = true; + // If this is a new configuration being opened automatically, // show a helpful message to the user if (_isNewConfiguration) @@ -38,37 +74,424 @@ public ToolConfigForm(bool openAsNew = false) } } + private void ToolConfigForm_Load(object sender, EventArgs e) + { + // Custom initialization logic can be added here. + // For example: Load tool configuration settings. + } + + private void InitializeToolTips() + { + _formToolTip = new ToolTip + { + AutoPopDelay = 5000, + InitialDelay = 1000, + ReshowDelay = 500, + ShowAlways = true + }; + + // Define tooltips in a dictionary for easier management + var tooltips = new Dictionary + { + { chkCMDPlugin, "Enable command prompt access for the AI assistant" }, + { chkPowerShellPlugin, "Enable PowerShell script execution for the AI assistant" }, + { chkScreenCapturePlugin, "Allow the AI assistant to capture screenshots" }, + { chkKeyboardPlugin, "Allow the AI assistant to control keyboard input" }, + { chkMousePlugin, "Allow the AI assistant to control mouse movements" }, + { chkWindowSelectionPlugin, "Allow the AI assistant to interact with specific windows" }, + { enablePluginLoggingCheckBox, "Record all plugin activities for troubleshooting" }, + { numTemperature, "Controls AI randomness: Higher values = more creative, lower values = more deterministic" }, + { chkMultiAgentMode, "Enable planner and executor agent mode for complex tasks" }, + { chkAutoInvoke, "Allow AI to automatically execute tools without confirmation" }, + { chkRetainChatHistory, "Save conversation history between sessions" }, + { chkEnableSpeechRecognition, "Enable voice input recognition" }, + { comboSpeechLanguage, "Select language for speech recognition" }, + { chkEnableVoiceCommands, "Enable voice command activation" }, + { txtVoiceCommandPhrase, "Phrase to activate voice commands (e.g. 'Hey Assistant')" }, + { cbTheme, "Choose light or dark theme for the interface" }, + { cmbProfiles, "Load or save different configuration profiles" }, + { chkPlaywrightPlugin, "Allow the AI assistant to automate browser interactions" }, + { chkEnableRemoteControl, "Enable remote HTTP control" }, + { numRemotePort, "Port for remote control server" } + }; + + foreach (var kvp in tooltips) + { + _formToolTip.SetToolTip(kvp.Key, kvp.Value); + } + } + + private void PopulateConfigurationProfiles() + { + var profiles = _profileManager.GetAvailableProfiles(); + cmbProfiles.Items.Clear(); + foreach (var profile in profiles) + { + cmbProfiles.Items.Add(profile); + } + + if (cmbProfiles.Items.Count > 0) + { + cmbProfiles.SelectedIndex = 0; + } + } + + private void ApplyTheme(string themeName) + { + if (themeName == "Dark") + { + ApplyDarkTheme(); + } + else + { + ApplyLightTheme(); + } + } + + private void ApplyLightTheme() + { + // Set light theme colors + this.BackColor = Color.White; + this.ForeColor = Color.Black; + + // Apply to all tab pages + foreach (TabPage tab in tabControlMain.TabPages) + { + tab.BackColor = Color.White; + tab.ForeColor = Color.Black; + } + + // Apply to group boxes + foreach (Control control in this.Controls) + { + if (control is GroupBox) + { + control.BackColor = Color.White; + control.ForeColor = Color.Black; + } + } + + // Update status indicator + UpdateStatusIndicators(); + } + + private void ApplyDarkTheme() + { + // Set dark theme colors + this.BackColor = Color.FromArgb(45, 45, 48); + this.ForeColor = Color.White; + + // Apply to all tab pages + foreach (TabPage tab in tabControlMain.TabPages) + { + tab.BackColor = Color.FromArgb(45, 45, 48); + tab.ForeColor = Color.White; + } + + // Apply to group boxes + foreach (Control control in this.Controls) + { + if (control is GroupBox) + { + control.BackColor = Color.FromArgb(45, 45, 48); + control.ForeColor = Color.White; + } + } + + // Update status indicator + UpdateStatusIndicators(); + } + + private void PopulateAPIConfigurations() + { + try + { + string configDir = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "Config"); + + if (!Directory.Exists(configDir)) + { + Directory.CreateDirectory(configDir); + } + + string[] configFiles = Directory.GetFiles(configDir, "*.json"); + var configNames = configFiles.Select(Path.GetFileNameWithoutExtension).ToList(); + + if (!configNames.Contains("actioner")) configNames.Add("actioner"); + if (!configNames.Contains("planner")) configNames.Add("planner"); + if (!configNames.Contains("executor")) configNames.Add("executor"); + if (!configNames.Contains("coordinator")) configNames.Add("coordinator"); + + comboPlannerConfig.Items.Clear(); + comboActionerConfig.Items.Clear(); + comboCoordinatorConfig.Items.Clear(); + + foreach (var name in configNames) + { + comboPlannerConfig.Items.Add(name); + comboActionerConfig.Items.Add(name); + comboCoordinatorConfig.Items.Add(name); + } + + if (comboPlannerConfig.Items.Count > 0) + { + comboPlannerConfig.SelectedItem = comboPlannerConfig.Items.Contains("planner") ? + "planner" : comboPlannerConfig.Items[0]; + } + + if (comboActionerConfig.Items.Count > 0) + { + comboActionerConfig.SelectedItem = comboActionerConfig.Items.Contains("actioner") ? + "actioner" : comboActionerConfig.Items[0]; + } + + if (comboCoordinatorConfig.Items.Count > 0) + { + comboCoordinatorConfig.SelectedItem = comboCoordinatorConfig.Items.Contains("coordinator") ? + "coordinator" : comboCoordinatorConfig.Items[0]; + } + } + catch (Exception ex) + { + MessageBox.Show($"Error populating API configurations: {ex.Message}", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); + } + } + + private void PopulateSpeechLanguages() + { + comboSpeechLanguage.Items.Clear(); + + try + { + foreach (var recognizerInfo in SpeechRecognitionEngine.InstalledRecognizers()) + { + comboSpeechLanguage.Items.Add(recognizerInfo.Culture.Name); + } + } + catch (Exception ex) + { + Console.WriteLine($"Error getting speech recognizers: {ex.Message}"); + } + + // If no recognizers found or an error occurred, add default items + if (comboSpeechLanguage.Items.Count == 0) + { + comboSpeechLanguage.Items.AddRange(new object[] { + "en-US", + "en-GB", + "en-AU", + "fr-FR", + "es-ES", + "de-DE" + }); + } + + // Select the first item if none is selected + if (comboSpeechLanguage.SelectedIndex == -1 && comboSpeechLanguage.Items.Count > 0) + { + comboSpeechLanguage.SelectedIndex = 0; + } + } + private void LoadToolConfig() { - // Check if config file exists - string configPath = ToolConfig.ConfigFilePath(toolFileName); - bool configExists = File.Exists(configPath); - - _toolConfig = ToolConfig.LoadConfig(toolFileName); + try + { + string configPath = ToolConfig.ConfigFilePath(toolFileName); + bool configExists = File.Exists(configPath); + + _toolConfig = ToolConfig.LoadConfig(toolFileName) ?? new ToolConfig(); + + // Safely update UI elements + chkCMDPlugin.Checked = _toolConfig.EnableCMDPlugin; + chkPowerShellPlugin.Checked = _toolConfig.EnablePowerShellPlugin; + chkScreenCapturePlugin.Checked = _toolConfig.EnableScreenCapturePlugin; + chkKeyboardPlugin.Checked = _toolConfig.EnableKeyboardPlugin; + chkMousePlugin.Checked = _toolConfig.EnableMousePlugin; + chkWindowSelectionPlugin.Checked = _toolConfig.EnableWindowSelectionPlugin; + enablePluginLoggingCheckBox.Checked = _toolConfig.EnablePluginLogging; + chkPlaywrightPlugin.Checked = _toolConfig.EnablePlaywrightPlugin; + chkEnableRemoteControl.Checked = _toolConfig.EnableRemoteControl; + numRemotePort.Value = _toolConfig.RemoteControlPort; + + numTemperature.Value = (decimal)_toolConfig.Temperature; + chkAutoInvoke.Checked = _toolConfig.AutoInvokeKernelFunctions; + chkRetainChatHistory.Checked = _toolConfig.RetainChatHistory; + chkMultiAgentMode.Checked = _toolConfig.EnableMultiAgentMode; + chkEnableSpeechRecognition.Checked = _toolConfig.EnableSpeechRecognition; + chkEnableVoiceCommands.Checked = _toolConfig.EnableVoiceCommands; + txtVoiceCommandPhrase.Text = _toolConfig.VoiceCommandPhrase; + + // Check if the item exists in the combo box before setting it + if (comboSpeechLanguage.Items.Contains(_toolConfig.SpeechRecognitionLanguage)) + { + comboSpeechLanguage.SelectedItem = _toolConfig.SpeechRecognitionLanguage; + } + else if (comboSpeechLanguage.Items.Count > 0) + { + comboSpeechLanguage.SelectedIndex = 0; + } + + txtPlannerSystemPrompt.Text = _toolConfig.PlannerSystemPrompt; + txtActionerSystemPrompt.Text = _toolConfig.ActionerSystemPrompt; + txtCoordinatorSystemPrompt.Text = _toolConfig.CoordinatorSystemPrompt; + chkUseCustomPlannerConfig.Checked = _toolConfig.UseCustomPlannerConfig; + chkUseCustomExecutorConfig.Checked = _toolConfig.UseCustomActionerConfig; + chkUseCustomCoordinatorConfig.Checked = _toolConfig.UseCustomCoordinatorConfig; + + // Check if the item exists in the combo box before setting it + if (comboPlannerConfig.Items.Contains(_toolConfig.PlannerConfigName)) + { + comboPlannerConfig.SelectedItem = _toolConfig.PlannerConfigName; + } + else if (comboPlannerConfig.Items.Count > 0) + { + comboPlannerConfig.SelectedIndex = 0; + } + + if (comboActionerConfig.Items.Contains(_toolConfig.ActionerConfigName)) + { + comboActionerConfig.SelectedItem = _toolConfig.ActionerConfigName; + } + else if (comboActionerConfig.Items.Count > 0) + { + comboActionerConfig.SelectedIndex = 0; + } + + if (comboCoordinatorConfig.Items.Contains(_toolConfig.CoordinatorConfigName)) + { + comboCoordinatorConfig.SelectedItem = _toolConfig.CoordinatorConfigName; + } + else if (comboCoordinatorConfig.Items.Count > 0) + { + comboCoordinatorConfig.SelectedIndex = 0; + } + + // Set the theme in the combo box + if (cbTheme.Items.Contains(_toolConfig.ThemeName)) + { + cbTheme.SelectedItem = _toolConfig.ThemeName; + } + else + { + cbTheme.SelectedIndex = 0; // Default to the first item + } + + chkDynamicToolPrompts.Checked = _toolConfig.DynamicToolPrompts; + + UpdateMultiAgentUIState(); + UpdatePlannerConfigUIState(); + UpdateActionerConfigUIState(); + UpdateCoordinatorConfigUIState(); + UpdateStatusIndicators(); + + if (_isNewConfiguration && !configExists) + { + _toolConfig.SaveConfig(toolFileName); + } + } + catch (Exception ex) + { + MessageBox.Show($"Error loading configuration: {ex.Message}", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); + } + } + + private void UpdateStatusIndicators() + { + var statusMappings = new Dictionary + + { + { chkCMDPlugin, indicatorCMD }, + { chkPowerShellPlugin, indicatorPowerShell }, + { chkScreenCapturePlugin, indicatorScreenCapture }, + { chkKeyboardPlugin, indicatorKeyboard }, + { chkMousePlugin, indicatorMouse }, + { chkWindowSelectionPlugin, indicatorWindow }, + { chkMultiAgentMode, indicatorMultiAgent }, + { chkEnableSpeechRecognition, indicatorSpeech }, + { chkEnableVoiceCommands, indicatorVoiceCmd }, + { chkAutoInvoke, indicatorAutoInvoke }, + { chkPlaywrightPlugin, indicatorPlaywright } + }; - chkCMDPlugin.Checked = _toolConfig.EnableCMDPlugin; - chkPowerShellPlugin.Checked = _toolConfig.EnablePowerShellPlugin; - chkScreenCapturePlugin.Checked = _toolConfig.EnableScreenCapturePlugin; - chkKeyboardPlugin.Checked = _toolConfig.EnableKeyboardPlugin; - chkMousePlugin.Checked = _toolConfig.EnableMousePlugin; - chkWindowSelectionPlugin.Checked = _toolConfig.EnableWindowSelectionPlugin; - enablePluginLoggingCheckBox.Checked = _toolConfig.EnablePluginLogging; + foreach (var mapping in statusMappings) + { + UpdateStatusIndicator(mapping.Key, mapping.Value); + } + } - numTemperature.Value = (decimal)_toolConfig.Temperature; - chkAutoInvoke.Checked = _toolConfig.AutoInvokeKernelFunctions; - chkRetainChatHistory.Checked = _toolConfig.RetainChatHistory; - txtSystemPrompt.Text = _toolConfig.SystemPrompt; - - // If this is a new configuration being created, save the default values - if (_isNewConfiguration && !configExists) + private void UpdateStatusIndicator(CheckBox checkBox, PictureBox indicator) + { + if (checkBox.Checked) { - _toolConfig.SaveConfig(toolFileName); + // Use a green checkmark emoji for enabled status + indicator.Image = null; + indicator.BackColor = Color.Transparent; + indicator.Text = "✅"; + indicator.Visible = true; } + else + { + // Use a red X emoji for disabled status + indicator.Image = null; + indicator.BackColor = Color.Transparent; + indicator.Text = "❌"; + indicator.Visible = true; + } + } + + private void UpdateMultiAgentUIState() + { + // Instead of disabling the entire tab, only disable planner-specific controls + // while keeping actioner-related controls enabled + tabPlanner.Enabled = true; + tabActioner.Enabled = true; + tabCoordinator.Enabled = true; + + // Only enable planner-specific controls when multi-agent mode is checked + txtPlannerSystemPrompt.Enabled = chkMultiAgentMode.Checked; + lblPlannerPrompt.Enabled = chkMultiAgentMode.Checked; + chkUseCustomPlannerConfig.Enabled = chkMultiAgentMode.Checked; + comboPlannerConfig.Enabled = chkMultiAgentMode.Checked && chkUseCustomPlannerConfig.Checked; + btnConfigurePlanner.Enabled = chkMultiAgentMode.Checked && chkUseCustomPlannerConfig.Checked; + + // Enable coordinator-specific controls when multi-agent mode is checked + txtCoordinatorSystemPrompt.Enabled = chkMultiAgentMode.Checked; + lblCoordinatorPrompt.Enabled = chkMultiAgentMode.Checked; + chkUseCustomCoordinatorConfig.Enabled = chkMultiAgentMode.Checked; + comboCoordinatorConfig.Enabled = chkMultiAgentMode.Checked && chkUseCustomCoordinatorConfig.Checked; + btnConfigureCoordinator.Enabled = chkMultiAgentMode.Checked && chkUseCustomCoordinatorConfig.Checked; + + // Actioner controls remain enabled regardless of multi-agent mode + + UpdateStatusIndicator(chkMultiAgentMode, indicatorMultiAgent); + } + + private void UpdatePlannerConfigUIState() + { + // Enable or disable the planner config dropdown based on checkbox + comboPlannerConfig.Enabled = chkUseCustomPlannerConfig.Checked; + btnConfigurePlanner.Enabled = chkUseCustomPlannerConfig.Checked; + } + + private void UpdateActionerConfigUIState() + { + // Enable or disable the actioner config dropdown based on checkbox + comboActionerConfig.Enabled = chkUseCustomExecutorConfig.Checked; + btnConfigureActioner.Enabled = chkUseCustomExecutorConfig.Checked; + } + + private void UpdateCoordinatorConfigUIState() + { + // Enable or disable the coordinator config dropdown based on checkbox + comboCoordinatorConfig.Enabled = chkUseCustomCoordinatorConfig.Checked; + btnConfigureCoordinator.Enabled = chkUseCustomCoordinatorConfig.Checked; } private void saveButton_Click(object sender, EventArgs e) { - // Save tool config + // Save existing tool config options _toolConfig.EnableCMDPlugin = chkCMDPlugin.Checked; _toolConfig.EnablePowerShellPlugin = chkPowerShellPlugin.Checked; _toolConfig.EnableScreenCapturePlugin = chkScreenCapturePlugin.Checked; @@ -76,21 +499,447 @@ private void saveButton_Click(object sender, EventArgs e) _toolConfig.EnableMousePlugin = chkMousePlugin.Checked; _toolConfig.EnableWindowSelectionPlugin = chkWindowSelectionPlugin.Checked; _toolConfig.EnablePluginLogging = enablePluginLoggingCheckBox.Checked; + _toolConfig.EnablePlaywrightPlugin = chkPlaywrightPlugin.Checked; + _toolConfig.EnableRemoteControl = chkEnableRemoteControl.Checked; + _toolConfig.RemoteControlPort = (int)numRemotePort.Value; _toolConfig.Temperature = (double)numTemperature.Value; _toolConfig.AutoInvokeKernelFunctions = chkAutoInvoke.Checked; _toolConfig.RetainChatHistory = chkRetainChatHistory.Checked; - _toolConfig.SystemPrompt = txtSystemPrompt.Text; + + // Save multi-agent mode setting + _toolConfig.EnableMultiAgentMode = chkMultiAgentMode.Checked; + + // Save speech recognition settings + _toolConfig.EnableSpeechRecognition = chkEnableSpeechRecognition.Checked; + if (comboSpeechLanguage.SelectedItem != null) + { + _toolConfig.SpeechRecognitionLanguage = comboSpeechLanguage.SelectedItem.ToString(); + } + + // Save voice command settings + _toolConfig.EnableVoiceCommands = chkEnableVoiceCommands.Checked; + _toolConfig.VoiceCommandPhrase = txtVoiceCommandPhrase.Text; + + // Save planner, actioner, and coordinator settings + _toolConfig.PlannerSystemPrompt = txtPlannerSystemPrompt.Text; + _toolConfig.ActionerSystemPrompt = txtActionerSystemPrompt.Text; + _toolConfig.CoordinatorSystemPrompt = txtCoordinatorSystemPrompt.Text; + + // Save custom model configuration options + _toolConfig.UseCustomPlannerConfig = chkUseCustomPlannerConfig.Checked; + _toolConfig.UseCustomActionerConfig = chkUseCustomExecutorConfig.Checked; + _toolConfig.UseCustomCoordinatorConfig = chkUseCustomCoordinatorConfig.Checked; + + if (comboPlannerConfig.SelectedItem != null) + { + _toolConfig.PlannerConfigName = comboPlannerConfig.SelectedItem.ToString(); + } + + if (comboActionerConfig.SelectedItem != null) + { + _toolConfig.ActionerConfigName = comboActionerConfig.SelectedItem.ToString(); + } + + if (comboCoordinatorConfig.SelectedItem != null) + { + _toolConfig.CoordinatorConfigName = comboCoordinatorConfig.SelectedItem.ToString(); + } + + // Save theme configuration + if (cbTheme.SelectedItem != null) + { + _toolConfig.ThemeName = cbTheme.SelectedItem.ToString(); + _themeManager.CurrentTheme = cbTheme.SelectedItem.ToString(); + } + + // Save dynamic tool prompts setting + _toolConfig.DynamicToolPrompts = chkDynamicToolPrompts.Checked; _toolConfig.SaveConfig(toolFileName); - MessageBox.Show("Configuration saved successfully.", "Success", MessageBoxButtons.OK, MessageBoxIcon.Information); - this.Close(); + // Update any active speech recognition services with the new command phrase + try + { + foreach (Form form in Application.OpenForms) + { + if (form is Form1 mainForm) + { + var field = typeof(Form1).GetField("speechRecognition", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance); + if (field != null) + { + var speechService = field.GetValue(mainForm) as SpeechRecognitionService; + speechService?.UpdateCommandPhrase(_toolConfig.VoiceCommandPhrase); + } + } + } + } + catch (Exception ex) + { + Console.WriteLine($"Failed to update speech service: {ex.Message}"); + } + + // Show save notification + ShowSaveNotification(); + } + + private void ShowSaveNotification() + { + // Show save notification with fade effect + saveNotification.Visible = true; + saveNotification.BringToFront(); + + // Start the timer to hide the notification + saveNotificationTimer.Start(); + } + + private void saveNotificationTimer_Tick(object sender, EventArgs e) + { + // Hide notification after timer expires + saveNotification.Visible = false; + saveNotificationTimer.Stop(); } private void cancelButton_Click_1(object sender, EventArgs e) { this.Close(); } + + // Event handler for multi-agent mode checkbox + private void chkMultiAgentMode_CheckedChanged(object sender, EventArgs e) + { + // Update UI state when the multi-agent checkbox is toggled + UpdateMultiAgentUIState(); + } + + // Event handlers for custom config checkboxes + private void chkUseCustomPlannerConfig_CheckedChanged(object sender, EventArgs e) + { + UpdatePlannerConfigUIState(); + } + + private void chkUseCustomExecutorConfig_CheckedChanged(object sender, EventArgs e) + { + UpdateActionerConfigUIState(); + } + + private void chkUseCustomCoordinatorConfig_CheckedChanged(object sender, EventArgs e) + { + UpdateCoordinatorConfigUIState(); + } + + // Event handlers for config buttons + private void btnConfigurePlanner_Click(object sender, EventArgs e) + { + if (comboPlannerConfig.SelectedItem != null) + { + string configName = comboPlannerConfig.SelectedItem.ToString(); + OpenAPIConfigForm(configName); + } + } + + private void btnConfigureActioner_Click(object sender, EventArgs e) + { + if (comboActionerConfig.SelectedItem != null) + { + string configName = comboActionerConfig.SelectedItem.ToString(); + OpenAPIConfigForm(configName); + } + } + + private void btnConfigureCoordinator_Click(object sender, EventArgs e) + { + if (comboCoordinatorConfig.SelectedItem != null) + { + string configName = comboCoordinatorConfig.SelectedItem.ToString(); + OpenAPIConfigForm(configName); + } + } + + // Helper method to open API config form + private void OpenAPIConfigForm(string configName) + { + using (var apiConfigForm = new ConfigForm(configName)) + { + apiConfigForm.ShowDialog(); + + // Refresh the API configurations after editing + PopulateAPIConfigurations(); + } + } + + // Event handler for theme selection + private void cbTheme_SelectedIndexChanged(object sender, EventArgs e) + { + if (cbTheme.SelectedItem != null) + { + string selectedTheme = cbTheme.SelectedItem.ToString(); + _themeManager.CurrentTheme = selectedTheme; + ApplyTheme(selectedTheme); + } + } + + // Event handlers for profile management + private void btnSaveProfile_Click(object sender, EventArgs e) + { + using (var inputDialog = new InputDialog("Save Profile", "Enter profile name:")) + { + if (inputDialog.ShowDialog() == DialogResult.OK) + { + string profileName = inputDialog.InputText; + if (!string.IsNullOrEmpty(profileName)) + { + // Save current settings to profile + _profileManager.SaveProfile(profileName, _toolConfig); + + // Refresh profile list + PopulateConfigurationProfiles(); + + // Select the newly created profile + cmbProfiles.SelectedItem = profileName; + + // Show confirmation + MessageBox.Show($"Profile '{profileName}' saved successfully.", + "Profile Saved", MessageBoxButtons.OK, MessageBoxIcon.Information); + } + } + } + } + + private void btnLoadProfile_Click(object sender, EventArgs e) + { + if (cmbProfiles.SelectedItem != null) + { + string profileName = cmbProfiles.SelectedItem.ToString(); + + // Load the selected profile + ToolConfig profileConfig = _profileManager.LoadProfile(profileName); + if (profileConfig != null) + { + _toolConfig = profileConfig; + LoadToolConfig(); // Refresh UI with loaded config + + // Show confirmation + MessageBox.Show($"Profile '{profileName}' loaded successfully.", + "Profile Loaded", MessageBoxButtons.OK, MessageBoxIcon.Information); + } + } + } + + private void btnDeleteProfile_Click(object sender, EventArgs e) + { + if (cmbProfiles.SelectedItem != null) + { + string profileName = cmbProfiles.SelectedItem.ToString(); + + // Confirm deletion + if (MessageBox.Show($"Are you sure you want to delete profile '{profileName}'?", + "Confirm Delete", MessageBoxButtons.YesNo, MessageBoxIcon.Question) == DialogResult.Yes) + { + // Delete the profile + _profileManager.DeleteProfile(profileName); + + // Refresh profile list + PopulateConfigurationProfiles(); + + // Show confirmation + MessageBox.Show($"Profile '{profileName}' deleted.", + "Profile Deleted", MessageBoxButtons.OK, MessageBoxIcon.Information); + } + } + } + + private void txtSearchSettings_TextChanged(object sender, EventArgs e) + { + string searchText = txtSearchSettings.Text.ToLower(); + if (string.IsNullOrWhiteSpace(searchText)) + { + // If search is empty, show all tabs and reset visuals + foreach (TabPage tab in tabControlMain.TabPages) + { + tab.Text = tab.Name.Replace("tab", ""); + } + + searchResultLabel.Visible = false; + return; + } + + // Search through all controls on all tabs + int matchCount = 0; + List matchedTabs = new List(); + + foreach (TabPage tab in tabControlMain.TabPages) + { + bool tabHasMatch = false; + + // Check tab name/title first + if (tab.Text.ToLower().Contains(searchText)) + { + tabHasMatch = true; + matchCount++; + } + + // Check all controls in this tab + foreach (Control control in tab.Controls) + { + // Check if control text/name contains search text + if (control.Text.ToLower().Contains(searchText)) + { + tabHasMatch = true; + matchCount++; + } + + // For group boxes, check their child controls + if (control is GroupBox groupBox) + { + foreach (Control childControl in groupBox.Controls) + { + if (childControl.Text.ToLower().Contains(searchText)) + { + tabHasMatch = true; + matchCount++; + } + } + } + } + + if (tabHasMatch) + { + matchedTabs.Add(tab.Name); + tab.Text = "✓ " + tab.Name.Replace("tab", ""); + } + else + { + tab.Text = tab.Name.Replace("tab", ""); + } + } + + // Display results + searchResultLabel.Text = $"Found {matchCount} matches in {matchedTabs.Count} tabs"; + searchResultLabel.Visible = true; + + // If there's a match and only one tab has matches, switch to that tab + if (matchedTabs.Count == 1) + { + TabPage matchedTab = tabControlMain.TabPages[matchedTabs[0]]; + tabControlMain.SelectedTab = matchedTab; + } + } + + private void chkPluginStatus_CheckedChanged(object sender, EventArgs e) + { + // Update status indicators when any plugin checkbox changes + UpdateStatusIndicators(); + } + + // Reset button event handlers + private void btnResetActioner_Click(object sender, EventArgs e) + { + if (MessageBox.Show( + "Are you sure you want to reset the Actioner system prompt to default?\n\nThis will overwrite your current prompt.", + "Reset to Default", + MessageBoxButtons.YesNo, + MessageBoxIcon.Question) == DialogResult.Yes) + { + txtActionerSystemPrompt.Text = ToolConfig.GetDefaultActionerPrompt(); + MessageBox.Show( + "Actioner system prompt has been reset to default.\n\nDon't forget to click 'Save' to apply the changes!", + "Reset Complete", + MessageBoxButtons.OK, + MessageBoxIcon.Information); + } + } + + private void btnResetPlanner_Click(object sender, EventArgs e) + { + if (MessageBox.Show( + "Are you sure you want to reset the Planner system prompt to default?\n\nThis will overwrite your current prompt.", + "Reset to Default", + MessageBoxButtons.YesNo, + MessageBoxIcon.Question) == DialogResult.Yes) + { + txtPlannerSystemPrompt.Text = ToolConfig.GetDefaultPlannerPrompt(); + MessageBox.Show( + "Planner system prompt has been reset to default.\n\nDon't forget to click 'Save' to apply the changes!", + "Reset Complete", + MessageBoxButtons.OK, + MessageBoxIcon.Information); + } + } + + private void btnResetCoordinator_Click(object sender, EventArgs e) + { + if (MessageBox.Show( + "Are you sure you want to reset the Coordinator system prompt to default?\n\nThis will overwrite your current prompt.", + "Reset to Default", + MessageBoxButtons.YesNo, + MessageBoxIcon.Question) == DialogResult.Yes) + { + txtCoordinatorSystemPrompt.Text = ToolConfig.GetDefaultCoordinatorPrompt(); + MessageBox.Show( + "Coordinator system prompt has been reset to default.\n\nDon't forget to click 'Save' to apply the changes!", + "Reset Complete", + MessageBoxButtons.OK, + MessageBoxIcon.Information); + } + } + } + + // Simple input dialog for profile name entry + public class InputDialog : Form + { + private TextBox textBox; + private Button okButton; + private Button cancelButton; + private Label promptLabel; + + public string InputText + { + get { return textBox.Text; } + } + + public InputDialog(string title, string prompt) + { + this.Text = title; + this.FormBorderStyle = FormBorderStyle.FixedDialog; + this.MaximizeBox = false; + this.MinimizeBox = false; + this.StartPosition = FormStartPosition.CenterParent; + this.Width = 350; + this.Height = 150; + + promptLabel = new Label(); + promptLabel.Text = prompt; + promptLabel.Left = 10; + promptLabel.Top = 10; + promptLabel.Width = 330; + this.Controls.Add(promptLabel); + + textBox = new TextBox(); + textBox.Left = 10; + textBox.Top = 40; + textBox.Width = 330; + this.Controls.Add(textBox); + + okButton = new Button(); + okButton.Text = "OK"; + okButton.Left = 180; + okButton.Top = 80; + okButton.Width = 75; + okButton.DialogResult = DialogResult.OK; + this.Controls.Add(okButton); + + cancelButton = new Button(); + cancelButton.Text = "Cancel"; + cancelButton.Left = 265; + cancelButton.Top = 80; + cancelButton.Width = 75; + cancelButton.DialogResult = DialogResult.Cancel; + this.Controls.Add(cancelButton); + + this.AcceptButton = okButton; + this.CancelButton = cancelButton; + } } } diff --git a/FlowVision/ToolConfigForm.resx b/FlowVision/ToolConfigForm.resx index 1af7de1..a63a8e4 100644 --- a/FlowVision/ToolConfigForm.resx +++ b/FlowVision/ToolConfigForm.resx @@ -117,4 +117,10 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + 17, 17 + + + 154, 17 + \ No newline at end of file diff --git a/FlowVision/app.config b/FlowVision/app.config new file mode 100644 index 0000000..dd2ae42 --- /dev/null +++ b/FlowVision/app.config @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/FlowVision/lib/Classes/APIConfig.cs b/FlowVision/lib/Classes/APIConfig.cs index 19f49c8..2df0061 100644 --- a/FlowVision/lib/Classes/APIConfig.cs +++ b/FlowVision/lib/Classes/APIConfig.cs @@ -11,6 +11,7 @@ public class APIConfig public string DeploymentName { get; set; } public string EndpointURL { get; set; } public string APIKey { get; set; } + public string ProviderType { get; set; } = "AzureOpenAI"; // "AzureOpenAI", "OpenAI", "LMStudio" private static string ConfigFilePath(string model) { diff --git a/FlowVision/lib/Classes/AgentCoordinator.cs b/FlowVision/lib/Classes/AgentCoordinator.cs new file mode 100644 index 0000000..511fb62 --- /dev/null +++ b/FlowVision/lib/Classes/AgentCoordinator.cs @@ -0,0 +1,88 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace FlowVision.lib.Classes +{ + /// + /// Coordinates and tracks messages between different agents in a multi-agent system. + /// Maintains a record of interactions between different agent roles. + /// + public class AgentCoordinator + { + private List _messageHistory; + + /// + /// A read-only view of the message history for debugging and tracking + /// + public IReadOnlyList MessageHistory => _messageHistory.AsReadOnly(); + + public AgentCoordinator() + { + _messageHistory = new List(); + } + + /// + /// Add a message to the coordination history + /// + /// The agent role that sent the message + /// The agent role that received the message + /// The type/purpose of the message + /// The actual message content + public void AddMessage(AgentRole fromRole, AgentRole toRole, string messageType, string content) + { + var message = new AgentMessage + { + Timestamp = DateTime.Now, + FromRole = fromRole, + ToRole = toRole, + MessageType = messageType, + Content = content + }; + + _messageHistory.Add(message); + } + + /// + /// Get the most recent message of a specific type + /// + /// Type of message to retrieve + /// The most recent message of that type, or null if none exists + public AgentMessage GetLatestMessageOfType(string messageType) + { + return _messageHistory + .Where(m => m.MessageType == messageType) + .OrderByDescending(m => m.Timestamp) + .FirstOrDefault(); + } + + /// + /// Clears the message history + /// + public void Clear() + { + _messageHistory.Clear(); + } + } + + public enum AgentRole + { + User, + Coordinator, + Planner, + Actioner, + All + } + + + public class AgentMessage + { + public DateTime Timestamp { get; set; } + public AgentRole FromRole { get; set; } + public AgentRole ToRole { get; set; } + public string MessageType { get; set; } + public string Content { get; set; } + } +} diff --git a/FlowVision/lib/Classes/ChatExporter.cs b/FlowVision/lib/Classes/ChatExporter.cs new file mode 100644 index 0000000..fb6818d --- /dev/null +++ b/FlowVision/lib/Classes/ChatExporter.cs @@ -0,0 +1,260 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Text.Json; +using System.Windows.Forms; + +namespace FlowVision.lib.Classes +{ + /// + /// Exports chat history with tool calls for debugging and troubleshooting + /// + public static class ChatExporter + { + public class ExportedMessage + { + public string Timestamp { get; set; } + public string Author { get; set; } + public string Content { get; set; } + public List ToolCalls { get; set; } + public string Duration { get; set; } + } + + public class ToolCall + { + public string Plugin { get; set; } + public string Method { get; set; } + public string Parameters { get; set; } + public string Result { get; set; } + public string Timestamp { get; set; } + } + + /// + /// Export chat history to JSON format with tool calls + /// + public static void ExportToJson(List chatHistory, string filePath = null) + { + try + { + if (filePath == null) + { + using (SaveFileDialog saveDialog = new SaveFileDialog()) + { + saveDialog.Filter = "JSON files (*.json)|*.json|All files (*.*)|*.*"; + saveDialog.DefaultExt = "json"; + saveDialog.FileName = $"chat-export-{DateTime.Now:yyyy-MM-dd-HH-mm-ss}.json"; + + if (saveDialog.ShowDialog() == DialogResult.OK) + { + filePath = saveDialog.FileName; + } + else + { + return; + } + } + } + + var exportData = new + { + ExportTime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"), + MessageCount = chatHistory.Count, + Messages = chatHistory.Select(m => new + { + m.Timestamp, + m.Author, + m.Content + }) + }; + + string json = JsonSerializer.Serialize(exportData, new JsonSerializerOptions + { + WriteIndented = true + }); + + File.WriteAllText(filePath, json); + + MessageBox.Show($"Chat exported successfully to:\n{filePath}", "Export Complete", + MessageBoxButtons.OK, MessageBoxIcon.Information); + } + catch (Exception ex) + { + MessageBox.Show($"Error exporting chat: {ex.Message}", "Export Error", + MessageBoxButtons.OK, MessageBoxIcon.Error); + } + } + + /// + /// Export chat history to Markdown format + /// + public static void ExportToMarkdown(List chatHistory, string filePath = null) + { + try + { + if (filePath == null) + { + using (SaveFileDialog saveDialog = new SaveFileDialog()) + { + saveDialog.Filter = "Markdown files (*.md)|*.md|All files (*.*)|*.*"; + saveDialog.DefaultExt = "md"; + saveDialog.FileName = $"chat-export-{DateTime.Now:yyyy-MM-dd-HH-mm-ss}.md"; + + if (saveDialog.ShowDialog() == DialogResult.OK) + { + filePath = saveDialog.FileName; + } + else + { + return; + } + } + } + + var sb = new StringBuilder(); + sb.AppendLine($"# Chat Export - {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); + sb.AppendLine(); + sb.AppendLine($"**Total Messages:** {chatHistory.Count}"); + sb.AppendLine(); + sb.AppendLine("---"); + sb.AppendLine(); + + foreach (var message in chatHistory) + { + sb.AppendLine($"## {message.Author}"); + sb.AppendLine($"*{message.Timestamp}*"); + sb.AppendLine(); + sb.AppendLine(message.Content); + sb.AppendLine(); + sb.AppendLine("---"); + sb.AppendLine(); + } + + File.WriteAllText(filePath, sb.ToString()); + + MessageBox.Show($"Chat exported successfully to:\n{filePath}", "Export Complete", + MessageBoxButtons.OK, MessageBoxIcon.Information); + } + catch (Exception ex) + { + MessageBox.Show($"Error exporting chat: {ex.Message}", "Export Error", + MessageBoxButtons.OK, MessageBoxIcon.Error); + } + } + + /// + /// Export with detailed tool call logs for debugging + /// + public static void ExportWithToolCalls(List chatHistory, string logFilePath = null) + { + try + { + string mainFilePath = null; + using (SaveFileDialog saveDialog = new SaveFileDialog()) + { + saveDialog.Filter = "Markdown files (*.md)|*.md|All files (*.*)|*.*"; + saveDialog.DefaultExt = "md"; + saveDialog.FileName = $"chat-debug-export-{DateTime.Now:yyyy-MM-dd-HH-mm-ss}.md"; + + if (saveDialog.ShowDialog() == DialogResult.OK) + { + mainFilePath = saveDialog.FileName; + } + else + { + return; + } + } + + var sb = new StringBuilder(); + sb.AppendLine($"# Debugging Chat Export"); + sb.AppendLine($"**Export Time:** {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); + sb.AppendLine($"**Total Messages:** {chatHistory.Count}"); + sb.AppendLine(); + + // Try to include plugin logs if available + string pluginLogPath = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "plugin_usage.log"); + + if (File.Exists(pluginLogPath) && logFilePath == null) + { + logFilePath = pluginLogPath; + } + + sb.AppendLine("## Chat Messages"); + sb.AppendLine(); + + foreach (var message in chatHistory) + { + sb.AppendLine($"### {message.Author} - {message.Timestamp}"); + sb.AppendLine(); + sb.AppendLine("```"); + sb.AppendLine(message.Content); + sb.AppendLine("```"); + sb.AppendLine(); + } + + // Append tool call logs if available + if (logFilePath != null && File.Exists(logFilePath)) + { + sb.AppendLine("---"); + sb.AppendLine(); + sb.AppendLine("## Plugin Usage Log"); + sb.AppendLine(); + sb.AppendLine("```"); + + // Get last 1000 lines of log + var logLines = File.ReadAllLines(logFilePath); + var recentLines = logLines.Skip(Math.Max(0, logLines.Length - 1000)).ToArray(); + sb.AppendLine(string.Join(Environment.NewLine, recentLines)); + + sb.AppendLine("```"); + } + + File.WriteAllText(mainFilePath, sb.ToString()); + + MessageBox.Show($"Debug export created successfully:\n{mainFilePath}\n\n" + + "This includes chat history and plugin usage logs for troubleshooting.", + "Debug Export Complete", + MessageBoxButtons.OK, MessageBoxIcon.Information); + } + catch (Exception ex) + { + MessageBox.Show($"Error creating debug export: {ex.Message}", "Export Error", + MessageBoxButtons.OK, MessageBoxIcon.Error); + } + } + + /// + /// Quick export to clipboard in readable format + /// + public static void CopyToClipboard(List chatHistory) + { + try + { + var sb = new StringBuilder(); + sb.AppendLine($"=== Chat Export - {DateTime.Now:yyyy-MM-dd HH:mm:ss} ==="); + sb.AppendLine(); + + foreach (var message in chatHistory) + { + sb.AppendLine($"[{message.Timestamp}] {message.Author}:"); + sb.AppendLine(message.Content); + sb.AppendLine(); + } + + Clipboard.SetText(sb.ToString()); + + MessageBox.Show("Chat history copied to clipboard!", "Copied", + MessageBoxButtons.OK, MessageBoxIcon.Information); + } + catch (Exception ex) + { + MessageBox.Show($"Error copying to clipboard: {ex.Message}", "Error", + MessageBoxButtons.OK, MessageBoxIcon.Error); + } + } + } +} diff --git a/FlowVision/lib/Classes/FlorenceCaptionBridge.cs b/FlowVision/lib/Classes/FlorenceCaptionBridge.cs new file mode 100644 index 0000000..e69de29 diff --git a/FlowVision/lib/Classes/LMStudioConfig.cs b/FlowVision/lib/Classes/LMStudioConfig.cs new file mode 100644 index 0000000..c03b76e --- /dev/null +++ b/FlowVision/lib/Classes/LMStudioConfig.cs @@ -0,0 +1,80 @@ +using System; +using System.IO; +using System.Text.Json; + +namespace FlowVision.lib.Classes +{ + public class LMStudioConfig + { + public string EndpointURL { get; set; } = "http://localhost:1234/v1"; + public string ModelName { get; set; } = "local-model"; + public double Temperature { get; set; } = 0.7; + public int MaxTokens { get; set; } = 2048; + public bool Enabled { get; set; } = false; + public string APIKey { get; set; } = "lm-studio"; // OpenAI client requires a key even if local + public int TimeoutSeconds { get; set; } = 120; + + // Track if the config was successfully loaded from disk + [System.Text.Json.Serialization.JsonIgnore] + public bool IsValid { get; set; } = true; + + private static string ConfigFilePath => Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", + "lmstudioconfig.json"); + + public static LMStudioConfig LoadConfig() + { + try + { + Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath)); + + if (File.Exists(ConfigFilePath)) + { + string jsonContent = File.ReadAllText(ConfigFilePath); + // Basic validation for empty file + if (string.IsNullOrWhiteSpace(jsonContent)) + { + return new LMStudioConfig { IsValid = false }; + } + + var config = JsonSerializer.Deserialize(jsonContent); + if (config != null) + { + config.IsValid = true; + return config; + } + } + } + catch (Exception ex) + { + // Return a config marked as invalid so the UI can warn the user + return new LMStudioConfig + { + Enabled = false, + IsValid = false + }; + } + + // Return default if file doesn't exist + return new LMStudioConfig(); + } + + public void SaveConfig() + { + try + { + Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath)); + + var options = new JsonSerializerOptions { WriteIndented = true }; + string jsonContent = JsonSerializer.Serialize(this, options); + File.WriteAllText(ConfigFilePath, jsonContent); + } + catch (Exception ex) + { + Console.WriteLine($"Error saving LM Studio config: {ex.Message}"); + throw; // Re-throw so the UI can show the error + } + } + } +} diff --git a/FlowVision/lib/Classes/MarkdownHelper.cs b/FlowVision/lib/Classes/MarkdownHelper.cs index f619234..615b4a0 100644 --- a/FlowVision/lib/Classes/MarkdownHelper.cs +++ b/FlowVision/lib/Classes/MarkdownHelper.cs @@ -81,7 +81,10 @@ private static void ApplyBoldFormatting(RichTextBox richTextBox, string text) richTextBox.SelectedText = boldText; // Re-select the text and make it bold richTextBox.Select(startIndex, boldText.Length); - richTextBox.SelectionFont = new Font(richTextBox.SelectionFont, FontStyle.Bold); + + // Safely get current font or use default + Font currentFont = richTextBox.SelectionFont ?? new Font("Segoe UI", 10F); + richTextBox.SelectionFont = new Font(currentFont, FontStyle.Bold); } } } @@ -101,7 +104,10 @@ private static void ApplyItalicsFormatting(RichTextBox richTextBox, string text) richTextBox.Select(startIndex, match.Length); richTextBox.SelectedText = italicText; richTextBox.Select(startIndex, italicText.Length); - richTextBox.SelectionFont = new Font(richTextBox.SelectionFont, FontStyle.Italic); + + // Safely get current font or use default + Font currentFont = richTextBox.SelectionFont ?? new Font("Segoe UI", 10F); + richTextBox.SelectionFont = new Font(currentFont, FontStyle.Italic); } } } @@ -121,7 +127,10 @@ private static void ApplyInlineCodeFormatting(RichTextBox richTextBox, string te richTextBox.Select(startIndex, match.Length); richTextBox.SelectedText = codeText; richTextBox.Select(startIndex, codeText.Length); - richTextBox.SelectionFont = new Font("Consolas", richTextBox.SelectionFont.Size); + + // Get current font size, defaulting to 10 if null + float fontSize = richTextBox.SelectionFont?.Size ?? 10F; + richTextBox.SelectionFont = new Font("Consolas", fontSize); richTextBox.SelectionBackColor = Color.LightGray; } } @@ -141,7 +150,10 @@ private static void ApplyCodeBlockFormatting(RichTextBox richTextBox, string tex richTextBox.Select(startIndex, match.Length); richTextBox.SelectedText = codeBlockText; richTextBox.Select(startIndex, codeBlockText.Length); - richTextBox.SelectionFont = new Font("Consolas", richTextBox.SelectionFont.Size); + + // Get current font size, defaulting to 10 if null + float fontSize = richTextBox.SelectionFont?.Size ?? 10F; + richTextBox.SelectionFont = new Font("Consolas", fontSize); richTextBox.SelectionBackColor = Color.LightGray; } } diff --git a/FlowVision/lib/Classes/OcrHelper.cs b/FlowVision/lib/Classes/OcrHelper.cs new file mode 100644 index 0000000..5ae1a6e --- /dev/null +++ b/FlowVision/lib/Classes/OcrHelper.cs @@ -0,0 +1,237 @@ +using System; +using System.Drawing; +using System.Drawing.Imaging; +using System.IO; +using System.Text; +using System.Threading.Tasks; +using Tesseract; + +namespace FlowVision.lib.Classes +{ + /// + /// Helper class for performing OCR on images using Tesseract + /// + public static class OcrHelper + { + private static bool _initialized = false; + private static bool _isAvailable = false; + private static TesseractEngine _engine; + private static readonly object _lock = new object(); + private static string _tessdataPath; + + static OcrHelper() + { + Initialize(); + } + + private static void Initialize() + { + if (_initialized) + return; + + lock (_lock) + { + if (_initialized) + return; + + _initialized = true; + + try + { + // Try to find tessdata directory + string baseDir = AppDomain.CurrentDomain.BaseDirectory; + _tessdataPath = Path.Combine(baseDir, "tessdata"); + + if (!Directory.Exists(_tessdataPath)) + { + PluginLogger.LogError("OcrHelper", "Initialize", + $"tessdata directory not found at: {_tessdataPath}"); + _isAvailable = false; + return; + } + + string engDataFile = Path.Combine(_tessdataPath, "eng.traineddata"); + if (!File.Exists(engDataFile)) + { + PluginLogger.LogError("OcrHelper", "Initialize", + $"English language data not found at: {engDataFile}"); + _isAvailable = false; + return; + } + + // Initialize Tesseract engine + _engine = new TesseractEngine(_tessdataPath, "eng", EngineMode.Default); + + // Configure for better UI text recognition + _engine.SetVariable("tessedit_char_whitelist", + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .-_:@/\\()[]{}!?&+=#$%"); + _engine.SetVariable("preserve_interword_spaces", "1"); + + _isAvailable = true; + PluginLogger.LogInfo("OcrHelper", "Initialize", + "✓ Tesseract OCR initialized successfully. Text extraction is now enabled."); + } + catch (Exception ex) + { + PluginLogger.LogError("OcrHelper", "Initialize", + $"Failed to initialize Tesseract: {ex.Message}"); + _isAvailable = false; + _engine?.Dispose(); + _engine = null; + } + } + } + + /// + /// Search for specific text in an image and return its bounding box. + /// Returns the first match found. + /// + public static async Task FindTextLocationAsync(Bitmap image, string searchText) + { + if (!_isAvailable || _engine == null || string.IsNullOrWhiteSpace(searchText)) + return null; + + return await Task.Run(() => + { + try + { + lock (_lock) + { + using (var pix = PixConverter.ToPix(image)) + using (var page = _engine.Process(pix)) + using (var iter = page.GetIterator()) + { + iter.Begin(); + do + { + // Get text at current iterator level (Word) + string currentText = iter.GetText(PageIteratorLevel.Word)?.Trim(); + + // Simple case-insensitive match + if (!string.IsNullOrWhiteSpace(currentText) && + currentText.Equals(searchText, StringComparison.OrdinalIgnoreCase)) + { + // Found exact match! Get bounding box. + if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var rect)) + { + return (Rectangle?)new Rectangle(rect.X1, rect.Y1, rect.Width, rect.Height); + } + } + } while (iter.Next(PageIteratorLevel.Word)); + } + } + return (Rectangle?)null; + } + catch (Exception ex) + { + PluginLogger.LogError("OcrHelper", "FindTextLocationAsync", $"OCR search failed: {ex.Message}"); + return (Rectangle?)null; + } + }); + } + + /// + /// Extract text from a bitmap image + /// + public static async Task ExtractTextAsync(Bitmap image) + { + if (!_isAvailable || _engine == null) + return string.Empty; + + return await Task.Run(() => + { + try + { + lock (_lock) + { + using (var pix = PixConverter.ToPix(image)) + using (var page = _engine.Process(pix)) + { + string text = page.GetText()?.Trim(); + return text ?? string.Empty; + } + } + } + catch (Exception ex) + { + PluginLogger.LogError("OcrHelper", "ExtractTextAsync", + $"OCR failed: {ex.Message}"); + return string.Empty; + } + }); + } + + /// + /// Extract text from a specific region of an image + /// + public static async Task ExtractTextFromRegionAsync(Bitmap sourceImage, RectangleF region) + { + if (!_isAvailable || _engine == null) + return string.Empty; + + return await Task.Run(() => + { + try + { + // Validate and adjust region bounds + int x = Math.Max(0, (int)region.X); + int y = Math.Max(0, (int)region.Y); + int width = Math.Min((int)region.Width, sourceImage.Width - x); + int height = Math.Min((int)region.Height, sourceImage.Height - y); + + // Skip very small regions (likely not text) + if (width < 10 || height < 10) + return string.Empty; + + // Crop the region + Rectangle cropRect = new Rectangle(x, y, width, height); + using (Bitmap croppedImage = sourceImage.Clone(cropRect, sourceImage.PixelFormat)) + { + lock (_lock) + { + using (var pix = PixConverter.ToPix(croppedImage)) + using (var page = _engine.Process(pix)) + { + string text = page.GetText()?.Trim(); + + // Only return if we found meaningful text (more than just whitespace) + if (!string.IsNullOrWhiteSpace(text) && text.Length > 1) + { + return text; + } + return string.Empty; + } + } + } + } + catch (Exception ex) + { + PluginLogger.LogError("OcrHelper", "ExtractTextFromRegionAsync", + $"OCR failed for region: {ex.Message}"); + return string.Empty; + } + }); + } + + /// + /// Check if OCR is available + /// + public static bool IsAvailable => _isAvailable; + + /// + /// Cleanup resources + /// + public static void Dispose() + { + lock (_lock) + { + if (_engine != null) + { + _engine.Dispose(); + _engine = null; + _isAvailable = false; + } + } + } + } +} diff --git a/FlowVision/lib/Classes/OmniParserClient.cs b/FlowVision/lib/Classes/OmniParserClient.cs deleted file mode 100644 index a89ceee..0000000 --- a/FlowVision/lib/Classes/OmniParserClient.cs +++ /dev/null @@ -1,107 +0,0 @@ -using FlowVision.lib.Classes; -using FlowVision.Properties; -using Newtonsoft.Json; -using System; -using System.Collections.Generic; -using System.Net.Http; -using System.Text; -using System.Threading.Tasks; - -/// -/// Client for interacting with the OmniParser FastAPI /parse/ endpoint. -/// -public class OmniParserClient -{ - // Set the API endpoint for the FastAPI server. - private const string ApiEndpoint = "/parse/"; - private readonly HttpClient _httpClient; - - public OmniParserClient(HttpClient httpClient) - { - _httpClient = httpClient; - } - - /// - /// Processes a screenshot provided as a Base64-encoded image string and deserializes the JSON response. - /// - /// A Base64-encoded image string. - /// An instance of MyCustomObject representing the API response. - public async Task ProcessScreenshotAsync(string base64Image) - { - // Use the static LoadConfig method to retrieve configuration - OmniParserConfig config = OmniParserConfig.LoadConfig(); - - if (string.IsNullOrWhiteSpace(base64Image)) - throw new ArgumentException("Base64 image data is required", nameof(base64Image)); - - if (string.IsNullOrWhiteSpace(config.ServerURL)) - throw new InvalidOperationException("OmniParser server URL is not configured. Please set it in the OmniParser settings."); - - // Build the JSON payload expected by the API. - var payload = new { base64_image = base64Image }; - string jsonPayload = JsonConvert.SerializeObject(payload); - - // Create the HTTP content. - var content = new StringContent(jsonPayload, Encoding.UTF8, "application/json"); - - // Increase timeout to 10 minutes. - _httpClient.Timeout = TimeSpan.FromMinutes(10); - - // Properly concatenate the server URL with the API endpoint - string fullUrl = config.ServerURL.TrimEnd('/') + ApiEndpoint; - HttpResponseMessage response = await _httpClient.PostAsync(fullUrl, content); - response.EnsureSuccessStatusCode(); - - // Read and deserialize the response content into MyCustomObject. - string responseContent = await response.Content.ReadAsStringAsync(); - OmniparserResponse result = JsonConvert.DeserializeObject(responseContent); - return result; - } -} - -/// -/// Represents the top-level object returned by the API. -/// -public class OmniparserResponse -{ - [JsonProperty("som_image_base64")] - public string SomImageBase64 { get; set; } - [JsonProperty("parsed_content_list")] - public List ParsedContentList { get; set; } - [JsonProperty("latency")] - public double Latency { get; set; } -} - -/// -/// Represents each item in the parsed_content_list. -/// -public class ParsedContent -{ - [JsonProperty("type")] - public string Type { get; set; } - - // Using a double array for the bounding box values. - [JsonProperty("bbox")] - public double[] BBox { get; set; } - - [JsonProperty("interactivity")] - public bool Interactivity { get; set; } - - [JsonProperty("content")] - public string Content { get; set; } - - [JsonProperty("source")] - public string Source { get; set; } -} - -/* - * Future Stuff - * - * int x = Cursor.Position.X; -int y = Cursor.Position.Y; -int size = 10; // Arbitrary size - -System.Drawing.Graphics graphics = CreateGraphics(); -System.Drawing.Rectangle rectangle = new System.Drawing.Rectangle(x - (size / 2), y - (size / 2), size, size); -graphics.DrawRectangle(System.Drawing.Pens.Red, rectangle); - */ \ No newline at end of file diff --git a/FlowVision/lib/Classes/OmniParserConfig.cs b/FlowVision/lib/Classes/OmniParserConfig.cs deleted file mode 100644 index a1a8567..0000000 --- a/FlowVision/lib/Classes/OmniParserConfig.cs +++ /dev/null @@ -1,54 +0,0 @@ -using System; -using System.IO; -using System.Text.Json; - -namespace FlowVision.lib.Classes -{ - public class OmniParserConfig - { - public string ServerURL { get; set; } - - private static string ConfigFilePath => Path.Combine( - Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), - "FlowVision", - "OmniParserConfig.json"); - - public static OmniParserConfig LoadConfig() - { - try - { - Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath)); - - if (File.Exists(ConfigFilePath)) - { - string jsonContent = File.ReadAllText(ConfigFilePath); - if (!string.IsNullOrWhiteSpace(jsonContent)) - { - return JsonSerializer.Deserialize(jsonContent) ?? new OmniParserConfig(); - } - } - } - catch (Exception ex) - { - Console.WriteLine($"Error loading OmniParser config: {ex.Message}"); - } - return new OmniParserConfig(); - } - - public void SaveConfig() - { - try - { - Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath)); - - var options = new JsonSerializerOptions { WriteIndented = true }; - string jsonContent = JsonSerializer.Serialize(this, options); - File.WriteAllText(ConfigFilePath, jsonContent); - } - catch (Exception ex) - { - Console.WriteLine($"Error saving OmniParser config: {ex.Message}"); - } - } - } -} diff --git a/FlowVision/lib/Classes/ParsedContent.cs b/FlowVision/lib/Classes/ParsedContent.cs new file mode 100644 index 0000000..05fe33b --- /dev/null +++ b/FlowVision/lib/Classes/ParsedContent.cs @@ -0,0 +1,45 @@ +using Newtonsoft.Json; +using System.Collections.Generic; + +namespace FlowVision.lib.Classes +{ + /// + /// Represents each item in the parsed UI content list. + /// Used for screen capture and OmniParser results. + /// + public class ParsedContent + { + [JsonProperty("type")] + public string Type { get; set; } + + /// + /// Bounding box values [x1, y1, x2, y2] in pixels + /// + [JsonProperty("bbox")] + public double[] BBox { get; set; } + + [JsonProperty("interactivity")] + public bool Interactivity { get; set; } + + [JsonProperty("content")] + public string Content { get; set; } + + [JsonProperty("source")] + public string Source { get; set; } + } + + /// + /// Response from OmniParser processing + /// + public class OmniparserResponse + { + [JsonProperty("som_image_base64")] + public string SomImageBase64 { get; set; } + + [JsonProperty("parsed_content_list")] + public List ParsedContentList { get; set; } + + [JsonProperty("latency")] + public double Latency { get; set; } + } +} diff --git a/FlowVision/lib/Classes/PlaywrightSessionManager.cs b/FlowVision/lib/Classes/PlaywrightSessionManager.cs new file mode 100644 index 0000000..6170eaf --- /dev/null +++ b/FlowVision/lib/Classes/PlaywrightSessionManager.cs @@ -0,0 +1,160 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text.Json; + +namespace FlowVision.lib.Classes +{ + /// + /// Manages Playwright browser sessions for persistence between application runs + /// + public static class PlaywrightSessionManager + { + private static readonly string SessionDirectory = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "PlaywrightSessions"); + + /// + /// Saves the browser storage state for a session + /// + /// Unique identifier for the session + /// The storage state JSON string + public static void SaveStorageState(string sessionId, string storageState) + { + if (string.IsNullOrEmpty(sessionId)) + { + throw new ArgumentException("Session ID cannot be empty", nameof(sessionId)); + } + + if (string.IsNullOrEmpty(storageState)) + { + throw new ArgumentException("Storage state cannot be empty", nameof(storageState)); + } + + // Ensure directory exists + Directory.CreateDirectory(SessionDirectory); + + // Sanitize session ID to be a valid filename + string safeSessionId = SanitizeFileName(sessionId); + + // Save storage state to file + string filePath = GetSessionFilePath(safeSessionId); + File.WriteAllText(filePath, storageState); + } + + /// + /// Gets the storage state for a specific session + /// + /// Unique identifier for the session + /// The storage state JSON string, or null if not found + public static string GetStorageState(string sessionId) + { + if (string.IsNullOrEmpty(sessionId)) + { + throw new ArgumentException("Session ID cannot be empty", nameof(sessionId)); + } + + string safeSessionId = SanitizeFileName(sessionId); + string filePath = GetSessionFilePath(safeSessionId); + + if (!File.Exists(filePath)) + { + return null; + } + + return File.ReadAllText(filePath); + } + + /// + /// Gets a list of all available session IDs + /// + /// List of session IDs + public static List GetAllSessions() + { + var sessions = new List(); + + if (!Directory.Exists(SessionDirectory)) + { + return sessions; + } + + foreach (var file in Directory.GetFiles(SessionDirectory, "*.json")) + { + // Extract session ID from filename (remove extension) + string sessionId = Path.GetFileNameWithoutExtension(file); + sessions.Add(sessionId); + } + + return sessions; + } + + /// + /// Deletes a session + /// + /// Unique identifier for the session + /// True if deleted, false if not found + public static bool DeleteSession(string sessionId) + { + if (string.IsNullOrEmpty(sessionId)) + { + throw new ArgumentException("Session ID cannot be empty", nameof(sessionId)); + } + + string safeSessionId = SanitizeFileName(sessionId); + string filePath = GetSessionFilePath(safeSessionId); + + if (!File.Exists(filePath)) + { + return false; + } + + File.Delete(filePath); + return true; + } + + /// + /// Validates a session ID exists + /// + /// Unique identifier for the session + /// True if the session exists + public static bool SessionExists(string sessionId) + { + if (string.IsNullOrEmpty(sessionId)) + { + return false; + } + + string safeSessionId = SanitizeFileName(sessionId); + string filePath = GetSessionFilePath(safeSessionId); + + return File.Exists(filePath); + } + + /// + /// Gets the file path for a session + /// + /// Unique identifier for the session + /// Full path to the session file + private static string GetSessionFilePath(string sessionId) + { + return Path.Combine(SessionDirectory, $"{sessionId}.json"); + } + + /// + /// Sanitizes a string to be a valid filename + /// + /// Input filename + /// Sanitized filename + private static string SanitizeFileName(string fileName) + { + // Remove invalid characters + char[] invalidChars = Path.GetInvalidFileNameChars(); + foreach (char c in invalidChars) + { + fileName = fileName.Replace(c, '_'); + } + + return fileName; + } + } +} diff --git a/FlowVision/lib/Classes/PluginLogger.cs b/FlowVision/lib/Classes/PluginLogger.cs index c6d831e..20b02a4 100644 --- a/FlowVision/lib/Classes/PluginLogger.cs +++ b/FlowVision/lib/Classes/PluginLogger.cs @@ -1,6 +1,8 @@ -using System; +using System; using System.IO; using System.Text; +using System.Threading; +using System.Threading.Tasks; using System.Windows.Forms; namespace FlowVision.lib.Classes @@ -14,11 +16,15 @@ public class PluginLogger "plugin_usage.log"); private static RichTextBox _outputTextBox; + private static CancellationTokenSource _loadingIndicatorCts; + // Add delegate for direct UI updates + private static Action _addMessageDelegate; // Initialize logger with a text box for displaying messages - public static void Initialize(RichTextBox outputTextBox) + public static void Initialize(RichTextBox outputTextBox, Action addMessageDelegate = null) { _outputTextBox = outputTextBox; + _addMessageDelegate = addMessageDelegate; // Ensure log directory exists string logDir = Path.GetDirectoryName(LogFilePath); @@ -34,6 +40,12 @@ public static void LogPluginUsage(string pluginName, string methodName = null, s if (!ToolConfig.LoadConfig("toolsconfig").EnablePluginLogging) return; + // Track that a tool was called (for hallucination detection) + if (!string.IsNullOrEmpty(methodName)) + { + SimpleAgentActioner.IncrementToolCallCounter(); + } + string timestamp = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff"); string logMessage = $"[{timestamp}] Plugin: {pluginName}"; @@ -47,9 +59,76 @@ public static void LogPluginUsage(string pluginName, string methodName = null, s WriteToLogFile(logMessage); // Update UI if possible - UpdateUI($"Plugin usage: {pluginName}" + + string uiMessage = $"Plugin usage: {pluginName}" + (string.IsNullOrEmpty(methodName) ? "" : $".{methodName}()") + - (string.IsNullOrEmpty(parameters) ? "" : $" with {parameters}")); + (string.IsNullOrEmpty(parameters) ? "" : $" with {parameters}"); + + UpdateUI(uiMessage); + } + + // Show a notification message before executing a task + public static void NotifyTaskStart(string taskName, string description = null) + { + string message = $"Starting task: {taskName}"; + if (!string.IsNullOrEmpty(description)) + message += $" - {description}"; + + UpdateUI(message, isTaskNotification: true); + WriteToLogFile($"TASK START: {message}"); + } + + // Start a loading indicator animation for a running task + public static void StartLoadingIndicator(string taskName) + { + StopLoadingIndicator(); // Ensure any previous indicators are stopped + + _loadingIndicatorCts = new CancellationTokenSource(); + var token = _loadingIndicatorCts.Token; + + // Start the animation in a background task + Task.Run(async () => { + string[] frames = { "⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏" }; + int frameIndex = 0; + + while (!token.IsCancellationRequested) + { + string indicator = frames[frameIndex % frames.Length]; + string loadingMessage = $"{indicator} Processing {taskName}..."; + UpdateLoadingIndicator(loadingMessage); + + await Task.Delay(100, token).ContinueWith(t => { }, TaskContinuationOptions.OnlyOnCanceled); + if (token.IsCancellationRequested) break; + + frameIndex++; + } + }, token); + } + + // Stop the loading indicator animation + public static void StopLoadingIndicator() + { + if (_loadingIndicatorCts != null && !_loadingIndicatorCts.IsCancellationRequested) + { + _loadingIndicatorCts.Cancel(); + _loadingIndicatorCts.Dispose(); + _loadingIndicatorCts = null; + + // Clear the loading indicator from UI + UpdateLoadingIndicator(string.Empty); + } + } + + // Notify that a task has completed + public static void NotifyTaskComplete(string taskName, bool success = true) + { + StopLoadingIndicator(); + + string message = success + ? $"✓ Task completed: {taskName}" + : $"✗ Task failed: {taskName}"; + + UpdateUI(message, isTaskNotification: true); + WriteToLogFile($"TASK {(success ? "COMPLETE" : "FAILED")}: {taskName}"); } // Write a message to the log file @@ -70,19 +149,99 @@ private static void WriteToLogFile(string message) } // Update the UI with the plugin usage information - private static void UpdateUI(string message) + private static void UpdateUI(string message, bool isTaskNotification = false) { + // First try to update via the AddMessage delegate (for direct UI updates) + if (_addMessageDelegate != null) + { + try + { + string prefix = isTaskNotification ? "[Task]" : "[Logger]"; + _addMessageDelegate("System", $"{prefix} {message}", true); + return; // If successful, return early + } + catch (Exception ex) + { + // If the delegate fails, fall back to the text box approach + Console.WriteLine($"Error using message delegate: {ex.Message}"); + } + } + + // Fall back to the original text box approach if (_outputTextBox != null && !_outputTextBox.IsDisposed) { try { if (_outputTextBox.InvokeRequired) { - _outputTextBox.Invoke(new Action(UpdateUI), message); + _outputTextBox.Invoke(new Action(UpdateUI), message, isTaskNotification); return; } - _outputTextBox.AppendText($"[Logger] {message}\n"); + string prefix = isTaskNotification ? "[Task] " : "[Logger] "; + _outputTextBox.AppendText($"{prefix}{message}\n"); + _outputTextBox.ScrollToCaret(); + } + catch (Exception) + { + // Ignore UI update errors to prevent application disruption + } + } + } + + // Update the UI with the current loading indicator frame + private static void UpdateLoadingIndicator(string loadingMessage) + { + // Try to update via the AddMessage delegate first + if (_addMessageDelegate != null && !string.IsNullOrEmpty(loadingMessage)) + { + try + { + _addMessageDelegate("System", $"[Task] {loadingMessage}", true); + return; // If successful, return early + } + catch (Exception) + { + // If the delegate fails, fall back to the text box approach + } + } + + // Fall back to the original text box approach + if (_outputTextBox != null && !_outputTextBox.IsDisposed) + { + try + { + if (_outputTextBox.InvokeRequired) + { + _outputTextBox.Invoke(new Action(UpdateLoadingIndicator), loadingMessage); + return; + } + + // Find the last line that might be a loading indicator + string text = _outputTextBox.Text; + int lastLineStart = text.LastIndexOf('\n'); + + if (lastLineStart >= 0 && text.Length > lastLineStart + 1) + { + string lastLine = text.Substring(lastLineStart + 1); + if (lastLine.Contains("Processing") && lastLine.Contains("...")) + { + // Replace the previous loading indicator line + _outputTextBox.Select(lastLineStart + 1, lastLine.Length); + _outputTextBox.SelectedText = string.IsNullOrEmpty(loadingMessage) ? "" : $"[Task] {loadingMessage}"; + } + else if (!string.IsNullOrEmpty(loadingMessage)) + { + // Add a new loading indicator line + _outputTextBox.AppendText($"[Task] {loadingMessage}\n"); + } + } + else if (!string.IsNullOrEmpty(loadingMessage)) + { + // Add a new loading indicator line if this is the first line + _outputTextBox.AppendText($"[Task] {loadingMessage}\n"); + } + _outputTextBox.ScrollToCaret(); } catch (Exception) @@ -130,5 +289,21 @@ public static void ClearLogs() } } } + + internal static void LogError(string v1, string v2, string v3) + { + //log error + string timestamp = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff"); + string logMessage = $"[{timestamp}] Error: {v1}, {v2}, {v3}"; + WriteToLogFile(logMessage); + } + + internal static void LogInfo(string v1, string v2, string v3) + { + //log info + string timestamp = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss.fff"); + string logMessage = $"[{timestamp}] Info: {v1}, {v2}, {v3}"; + WriteToLogFile(logMessage); + } } } diff --git a/FlowVision/lib/Classes/PluginToolExtractor.cs b/FlowVision/lib/Classes/PluginToolExtractor.cs new file mode 100644 index 0000000..e8e9836 --- /dev/null +++ b/FlowVision/lib/Classes/PluginToolExtractor.cs @@ -0,0 +1,48 @@ +using System; +using System.Collections.Generic; +using System.Reflection; +using Microsoft.Extensions.AI; + +namespace FlowVision.lib.Classes +{ + /// + /// Helper class to extract AITools from plugin instances + /// + public static class PluginToolExtractor + { + /// + /// Extracts all public instance methods from a plugin and converts them to AITools + /// + public static List ExtractTools(object plugin) + { + var tools = new List(); + var pluginType = plugin.GetType(); + + var methods = pluginType.GetMethods(BindingFlags.Public | BindingFlags.Instance); + + foreach (var method in methods) + { + // Skip methods from Object base class and special methods (properties, etc.) + if (method.DeclaringType == typeof(object) || method.IsSpecialName) + continue; + + // Skip methods declared in parent types (only get methods from the plugin itself) + if (method.DeclaringType != pluginType) + continue; + + try + { + var tool = AIFunctionFactory.Create(method, plugin); + tools.Add(tool); + } + catch (Exception) + { + // Skip methods that can't be converted to tools + continue; + } + } + + return tools; + } + } +} diff --git a/FlowVision/lib/Classes/SettingsProfileManager.cs b/FlowVision/lib/Classes/SettingsProfileManager.cs new file mode 100644 index 0000000..92c4e04 --- /dev/null +++ b/FlowVision/lib/Classes/SettingsProfileManager.cs @@ -0,0 +1,106 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.Json; + +namespace FlowVision.lib.Classes +{ + /// + /// Manages user configuration profiles + /// + public class SettingsProfileManager + { + private readonly string _profilesDirectory; + + public SettingsProfileManager() + { + // Create profiles directory path + _profilesDirectory = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "Profiles"); + + // Ensure the profiles directory exists + Directory.CreateDirectory(_profilesDirectory); + } + + /// + /// Gets a list of available profile names + /// + public List GetAvailableProfiles() + { + try + { + // Get all profile files and extract names + return Directory.GetFiles(_profilesDirectory, "*.json") + .Select(Path.GetFileNameWithoutExtension) + .ToList(); + } + catch (Exception ex) + { + Console.WriteLine($"Error getting profiles: {ex.Message}"); + return new List(); + } + } + + /// + /// Saves the current configuration to a named profile + /// + public void SaveProfile(string profileName, ToolConfig config) + { + try + { + string profilePath = Path.Combine(_profilesDirectory, $"{profileName}.json"); + var options = new JsonSerializerOptions { WriteIndented = true }; + string jsonContent = JsonSerializer.Serialize(config, options); + File.WriteAllText(profilePath, jsonContent); + } + catch (Exception ex) + { + Console.WriteLine($"Error saving profile: {ex.Message}"); + } + } + + /// + /// Loads a configuration from a named profile + /// + public ToolConfig LoadProfile(string profileName) + { + try + { + string profilePath = Path.Combine(_profilesDirectory, $"{profileName}.json"); + if (File.Exists(profilePath)) + { + string jsonContent = File.ReadAllText(profilePath); + return JsonSerializer.Deserialize(jsonContent) ?? new ToolConfig(); + } + } + catch (Exception ex) + { + Console.WriteLine($"Error loading profile: {ex.Message}"); + } + + // Return default config if profile can't be loaded + return new ToolConfig(); + } + + /// + /// Deletes a named profile + /// + public void DeleteProfile(string profileName) + { + try + { + string profilePath = Path.Combine(_profilesDirectory, $"{profileName}.json"); + if (File.Exists(profilePath)) + { + File.Delete(profilePath); + } + } + catch (Exception ex) + { + Console.WriteLine($"Error deleting profile: {ex.Message}"); + } + } + } +} diff --git a/FlowVision/lib/Classes/SpeechRecognitionService.cs b/FlowVision/lib/Classes/SpeechRecognitionService.cs new file mode 100644 index 0000000..90e9e5c --- /dev/null +++ b/FlowVision/lib/Classes/SpeechRecognitionService.cs @@ -0,0 +1,156 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Speech.Recognition; +using System.Text; +using System.Threading.Tasks; +using System.Windows.Forms; + +namespace FlowVision.lib.Classes +{ + public class SpeechRecognitionService : IDisposable + { + private SpeechRecognitionEngine recognizer; + private bool isListening = false; + + public event EventHandler SpeechRecognized; + public event EventHandler CommandRecognized; // New event for voice commands + + public SpeechRecognitionService() + { + try + { + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + + // Get the installed recognizers + var recognizerInfo = SpeechRecognitionEngine.InstalledRecognizers() + .FirstOrDefault(r => r.Culture.Name == toolConfig.SpeechRecognitionLanguage) ?? + SpeechRecognitionEngine.InstalledRecognizers().First(); + + // Create a new recognition engine + recognizer = new SpeechRecognitionEngine(recognizerInfo); + + // Configure the recognizer with grammar - using dictation grammar for free speech + recognizer.LoadGrammar(new DictationGrammar()); + + // Add command grammar for send message functionality + if (!string.IsNullOrEmpty(toolConfig.VoiceCommandPhrase)) + { + AddCommandGrammar(toolConfig.VoiceCommandPhrase); + } + + // Set up event handlers + recognizer.SpeechRecognized += Recognizer_SpeechRecognized; + recognizer.RecognizeCompleted += Recognizer_RecognizeCompleted; + + // Set input device to default audio device + recognizer.SetInputToDefaultAudioDevice(); + } + catch (Exception ex) + { + // Rethrow to be handled by the caller + throw new Exception("Failed to initialize speech recognition: " + ex.Message, ex); + } + } + + private void AddCommandGrammar(string commandPhrase) + { + try + { + // Create a Choices object with the command phrase + Choices commands = new Choices(); + commands.Add(commandPhrase); + + // Build a grammar from the choices + GrammarBuilder grammarBuilder = new GrammarBuilder(); + grammarBuilder.Append(commands); + Grammar grammar = new Grammar(grammarBuilder); + grammar.Name = "CommandGrammar"; + + // Add the grammar to the recognizer + recognizer.LoadGrammar(grammar); + } + catch (Exception ex) + { + throw new Exception("Failed to add command grammar: " + ex.Message, ex); + } + } + + private void Recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e) + { + // Make sure confidence is at a reasonable level + if (e.Result.Confidence >= 0.6) + { + // Check if this is a command phrase + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + if (!string.IsNullOrEmpty(toolConfig.VoiceCommandPhrase) && + string.Equals(e.Result.Text, toolConfig.VoiceCommandPhrase, StringComparison.OrdinalIgnoreCase)) + { + // It's a command, trigger command event + CommandRecognized?.Invoke(this, e.Result.Text); + } + else + { + // Regular speech, trigger normal event + SpeechRecognized?.Invoke(this, e.Result.Text); + } + } + } + + private void Recognizer_RecognizeCompleted(object sender, RecognizeCompletedEventArgs e) + { + // Handle recognition completion + if (e.Error != null) + { + // Notify of error + MessageBox.Show($"Speech recognition error: {e.Error.Message}", "Recognition Error"); + } + + isListening = false; + } + + public void StartListening() + { + if (recognizer != null && !isListening) + { + recognizer.RecognizeAsync(RecognizeMode.Multiple); + isListening = true; + } + } + + public void StopListening() + { + if (recognizer != null && isListening) + { + recognizer.RecognizeAsyncStop(); + isListening = false; + } + } + + public void UpdateCommandPhrase(string phrase) + { + if (recognizer != null && !string.IsNullOrEmpty(phrase)) + { + // Remove old command grammar if it exists + var existingGrammar = recognizer.Grammars.FirstOrDefault(g => g.Name == "CommandGrammar"); + if (existingGrammar != null) + { + recognizer.UnloadGrammar(existingGrammar); + } + + // Add new command grammar + AddCommandGrammar(phrase); + } + } + + public void Dispose() + { + if (recognizer != null) + { + StopListening(); + recognizer.Dispose(); + recognizer = null; + } + } + } +} diff --git a/FlowVision/lib/Classes/TaskNotifier.cs b/FlowVision/lib/Classes/TaskNotifier.cs new file mode 100644 index 0000000..f427399 --- /dev/null +++ b/FlowVision/lib/Classes/TaskNotifier.cs @@ -0,0 +1,86 @@ +using System; +using System.Threading.Tasks; + +namespace FlowVision.lib.Classes +{ + /// + /// Helper class to manage task notifications and loading indicators + /// + public class TaskNotifier : IDisposable + { + private readonly string _taskName; + private readonly string _taskDescription; + private bool _disposed = false; + + /// + /// Creates a new task notification with loading indicator + /// + /// The name of the task being performed + /// Optional description of what the task does + public TaskNotifier(string taskName, string taskDescription = null) + { + _taskName = taskName; + _taskDescription = taskDescription; + + // Display the initial notification + PluginLogger.NotifyTaskStart(_taskName, _taskDescription); + + // Start the loading indicator + PluginLogger.StartLoadingIndicator(_taskName); + } + + /// + /// Completes the task notification and stops the loading indicator + /// + /// Whether the task completed successfully + public void Complete(bool success = true) + { + if (!_disposed) + { + PluginLogger.NotifyTaskComplete(_taskName, success); + PluginLogger.StopLoadingIndicator(); + _disposed = true; + } + } + + + /// + /// Helper method to wrap an async void task with notifications + /// + /// Name of the task + /// Description of the task + /// The task to execute + public static async Task RunWithNotificationAsync(string taskName, string description, Func task) + { + var notifier = new TaskNotifier(taskName, description); + try + { + await task(); + notifier.Complete(true); + } + catch + { + notifier.Complete(false); + throw; + } + finally + { + notifier.Dispose(); + } + } + + /// + /// Dispose pattern implementation + /// + public void Dispose() + { + if (!_disposed) + { + // Make sure we stop the loading indicator if this object is disposed + // without calling Complete + PluginLogger.StopLoadingIndicator(); + _disposed = true; + } + } + } +} diff --git a/FlowVision/lib/Classes/ToolConfig.cs b/FlowVision/lib/Classes/ToolConfig.cs index b8b1568..e58b109 100644 --- a/FlowVision/lib/Classes/ToolConfig.cs +++ b/FlowVision/lib/Classes/ToolConfig.cs @@ -6,69 +6,225 @@ namespace FlowVision.lib.Classes { public class ToolConfig { - public bool EnableCMDPlugin { get; set; } = true; - public bool EnablePowerShellPlugin { get; set; } = true; - public bool EnableScreenCapturePlugin { get; set; } = false; // Changed default to false - public bool EnableKeyboardPlugin { get; set; } = true; - public bool EnableMousePlugin { get; set; } = false; // Changed default to false - public bool EnableWindowSelectionPlugin { get; set; } = true; // Added WindowSelectionPlugin + public bool EnableCMDPlugin { get; set; } = false; + public bool EnablePowerShellPlugin { get; set; } = false; + public bool EnableScreenCapturePlugin { get; set; } = false; + public bool EnableKeyboardPlugin { get; set; } = false; + public bool EnableMousePlugin { get; set; } = false; + public bool EnableWindowSelectionPlugin { get; set; } = false; + public bool EnableClipboardPlugin { get; set; } = false; + public bool EnableFileSystemPlugin { get; set; } = false; + public bool EnableSpeechRecognition { get; set; } = true; + public string SpeechRecognitionLanguage { get; set; } = "en-US"; + public string VoiceCommandPhrase { get; set; } = "send message"; + public bool EnableVoiceCommands { get; set; } = true; public bool EnablePluginLogging { get; set; } = true; - public double Temperature { get; set; } = 0.2; + public double Temperature { get; set; } = 1.0; public bool AutoInvokeKernelFunctions { get; set; } = true; public bool RetainChatHistory { get; set; } = true; - public string SystemPrompt { get; set; } = @"You are an AI Agent that can use tools to help the user. Use your tools to come up with novel ideas to answer the users requests you dont have the answer to. Do not restart the server"; + public bool EnableMultiAgentMode { get; set; } = false; + public string ThemeName { get; set; } = "Light"; + public bool DynamicToolPrompts { get; set; } = true; - public static string ConfigFilePath(string configName) + // New properties for planner and actioner configuration + public string PlannerSystemPrompt { get; set; } = GetDefaultPlannerPrompt(); + + public string ActionerSystemPrompt { get; set; } = GetDefaultActionerPrompt(); + + public string CoordinatorSystemPrompt { get; set; } = GetDefaultCoordinatorPrompt(); + + + // Adding missing properties for custom model configurations + public bool UseCustomPlannerConfig { get; set; } = false; + public bool UseCustomActionerConfig { get; set; } = false; + public bool UseCustomCoordinatorConfig { get; set; } = false; + public string PlannerConfigName { get; set; } = "planner"; + public string ActionerConfigName { get; set; } = "actioner"; + public string CoordinatorConfigName { get; set; } = "coordinator"; + + public bool EnablePlaywrightPlugin { get; set; } = true; // Default to true for browser automation + + // Remote control settings + public bool EnableRemoteControl { get; set; } = false; + public int RemoteControlPort { get; set; } = 8085; + + public static string ConfigFilePath(string filename) { - return Path.Combine( + string configDir = Path.Combine( Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), - "FlowVision", - $"{configName}.json"); + "FlowVision", "Config"); + + if (!Directory.Exists(configDir)) + { + Directory.CreateDirectory(configDir); + } + + return Path.Combine(configDir, $"{filename}.json"); } - public static bool IsConfigured(string configName) + public void SaveConfig(string fileName) { - return File.Exists(ConfigFilePath(configName)); + string configPath = ConfigFilePath(fileName); + string jsonString = JsonSerializer.Serialize(this, new JsonSerializerOptions { WriteIndented = true }); + File.WriteAllText(configPath, jsonString); } - public static ToolConfig LoadConfig(string configName) + public static ToolConfig LoadConfig(string fileName) { - try - { - // Ensure the directory exists. - Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath(configName))); - - if (File.Exists(ConfigFilePath(configName))) - { - string jsonContent = File.ReadAllText(ConfigFilePath(configName)); - if (!string.IsNullOrWhiteSpace(jsonContent)) - { - return JsonSerializer.Deserialize(jsonContent) ?? new ToolConfig(); - } - } - } - catch (Exception ex) + string configPath = ConfigFilePath(fileName); + if (!File.Exists(configPath)) { - Console.WriteLine($"Error loading tool config: {ex.Message}"); + var config = new ToolConfig(); + config.SaveConfig(fileName); + return config; } - return new ToolConfig(); + + string jsonString = File.ReadAllText(configPath); + return JsonSerializer.Deserialize(jsonString); } - public void SaveConfig(string configName) + public static bool IsConfigured(string fileName) { - try - { - // Ensure the directory exists. - Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath(configName))); + string configPath = ConfigFilePath(fileName); + return File.Exists(configPath); + } - var options = new JsonSerializerOptions { WriteIndented = true }; - string jsonContent = JsonSerializer.Serialize(this, options); - File.WriteAllText(ConfigFilePath(configName), jsonContent); - } - catch (Exception ex) - { - Console.WriteLine($"Error saving tool config: {ex.Message}"); - } + // Static methods to get default prompts + public static string GetDefaultActionerPrompt() + { + return @"You are a browser automation agent. You control a web browser using Playwright tools. + +## AVAILABLE TOOLS + +**Browser Control:** +- LaunchBrowser(browserType, headless) - Start browser (chromium/firefox/webkit) +- CloseBrowser() - Close the browser +- NavigateTo(url) - Go to a URL +- WaitForPageLoad() - Wait for page to finish loading + +**Page Interaction:** +- ClickByText(text, waitForNavigation) - Click a button/link by its visible text (PREFERRED) +- ClickElement(selector, waitForNavigation) - Click by CSS selector +- TypeText(selector, text) - Type into an input field +- GetPageElements() - Get list of buttons, links, and input fields on page +- GetElementText(selector) - Get text content of an element +- TakeScreenshot() - Capture the current page + +**Session Management:** +- SaveSession() - Save login state for later +- ListSessions() - Show saved sessions + +## WORKFLOW + +1. Call GetPageElements() to see what's on the page +2. Use ClickByText() for buttons/links - it's more reliable than CSS selectors +3. Use TypeText() with the selector from GetPageElements() for input fields +4. Call GetPageElements() again to see the new page state + +## EXAMPLE + +To click a Connect button on LinkedIn: +1. GetPageElements() - see available buttons +2. ClickByText(""Connect"") - click the Connect button +3. GetPageElements() - verify result + +## RULES + +- Use ClickByText() for buttons and links - it's more reliable +- Use GetPageElements() to find input field selectors +- If ClickByText fails, try ClickElement with a CSS selector +- Report actual tool results, don't make up responses"; + } + + public static string GetDefaultPlannerPrompt() + { + return @"You are the Planner Agent. You create step-by-step plans for browser automation tasks. + +## YOUR ROLE + +- Break down complex tasks into simple steps +- Each step should be ONE tool call +- Verify results before proceeding to next step + +## STEP FORMAT + +Respond with ONE of: + +**Next step:** +NEXT STEP: [Tool call with parameters] +Example: NEXT STEP: Call NavigateTo(""https://linkedin.com"") +Example: NEXT STEP: Call GetPageElements() to find the login form + +**Task completed:** +TASK COMPLETED: [Summary of what was accomplished] + +**Task failed:** +TASK FAILED: [What went wrong and why] + +## WORKFLOW EXAMPLE + +Task: Log into LinkedIn and like 3 posts + +1. NEXT STEP: Call LaunchBrowser(""chromium"", ""false"") +2. NEXT STEP: Call NavigateTo(""https://linkedin.com"") +3. NEXT STEP: Call GetPageElements() to find login form +4. NEXT STEP: Call TypeText(""#session_key"", ""user@email.com"") +5. NEXT STEP: Call TypeText(""#session_password"", ""password"") +6. NEXT STEP: Call ClickElement(""button[type='submit']"", ""true"") +7. NEXT STEP: Call GetPageElements() to find like buttons +8. NEXT STEP: Call ClickElement(""[aria-label='Like']"") +9. [Repeat for remaining likes] +10. TASK COMPLETED: Liked 3 posts on LinkedIn + +## RULES + +- Only trust actual tool results +- If no tool was called, the action didn't happen +- One step at a time - wait for results before next step +- Use GetPageElements() to discover what's on each page"; + } + + public static string GetDefaultCoordinatorPrompt() + { + return @"You are the Coordinator Agent. You understand user requests and route them appropriately. + +## ROUTING + +**Simple tasks (1-2 steps):** Route directly to Actioner +Examples: ""Open LinkedIn"", ""Take a screenshot"" + +**Complex tasks (3+ steps):** Route through Planner +Examples: ""Log into LinkedIn and like 5 posts"", ""Fill out a form"" + +**Questions/Greetings:** Respond directly +Examples: ""Hi"", ""What can you do?"" + +## COMMUNICATION + +**With users:** Friendly, simple language +**With Planner/Actioner:** Direct, specific instructions + +## EXAMPLES + +User: ""Open LinkedIn"" +→ Route to Actioner: ""Launch browser and navigate to linkedin.com"" + +User: ""Like 5 posts on LinkedIn"" +→ Route to Planner: ""Navigate to LinkedIn, find posts, and like 5 of them"" + +User: ""What can you do?"" +→ Respond: ""I can help you automate web browsing tasks like logging into websites, clicking buttons, filling forms, and more."" + +## AVAILABLE CAPABILITIES + +- Browse websites +- Fill out forms +- Click buttons and links +- Log into websites (can save sessions) +- Take screenshots +- Extract text from pages + +Be helpful and honest about limitations."; } } } diff --git a/FlowVision/lib/Classes/ToolDescriptionGenerator.cs b/FlowVision/lib/Classes/ToolDescriptionGenerator.cs new file mode 100644 index 0000000..8559dc4 --- /dev/null +++ b/FlowVision/lib/Classes/ToolDescriptionGenerator.cs @@ -0,0 +1,195 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text; +using FlowVision.lib.Plugins; + +namespace FlowVision.lib.Classes +{ + /// + /// Utility class to generate descriptions of available tools for the system prompt + /// + public class ToolDescriptionGenerator + { + /// + /// Gets a formatted description of all enabled tools based on the current configuration + /// + /// The current tool configuration + /// A formatted string describing available tools + public static string GetToolDescriptions(ToolConfig toolConfig) + { + var availableTools = new List(); + + // Add tools based on configuration + if (toolConfig.EnableCMDPlugin) + availableTools.Add(typeof(CMDPlugin)); + + if (toolConfig.EnablePowerShellPlugin) + availableTools.Add(typeof(PowerShellPlugin)); + + if (toolConfig.EnableScreenCapturePlugin) + availableTools.Add(typeof(ScreenCapturePlugin)); + + if (toolConfig.EnableKeyboardPlugin) + availableTools.Add(typeof(KeyboardPlugin)); + + if (toolConfig.EnableMousePlugin) + availableTools.Add(typeof(MousePlugin)); + + if (toolConfig.EnableWindowSelectionPlugin) + availableTools.Add(typeof(WindowSelectionPlugin)); + + // Add Playwright plugin if enabled + if (toolConfig.EnablePlaywrightPlugin) + availableTools.Add(typeof(PlaywrightPlugin)); + + if (availableTools.Count == 0) + return "No tools are currently available."; + + return FormatToolDescriptions(availableTools); + } + + /// + /// Formats the list of tool types into a readable description for the LLM + /// + /// List of tool type classes + /// A formatted string with tool descriptions + private static string FormatToolDescriptions(List toolTypes) + { + var sb = new StringBuilder(); + sb.AppendLine("You have access to the following tools:"); + sb.AppendLine(); + + foreach (var toolType in toolTypes) + { + // Get the tool name (class name without "Plugin" suffix) + string toolName = toolType.Name; + if (toolName.EndsWith("Plugin")) + toolName = toolName.Substring(0, toolName.Length - 6); + + sb.AppendLine($"## {toolName}"); + + // Get methods that are potential kernel functions (public instance methods) + var methods = toolType.GetMethods(BindingFlags.Public | BindingFlags.Instance) + .Where(m => !m.IsSpecialName) // Filter out property accessors + .Where(m => m.DeclaringType != typeof(object)) // Filter out Object methods + .ToList(); + + foreach (var method in methods) + { + // Get function name + string functionName = method.Name; + + // Get parameters + var parameters = method.GetParameters(); + string paramList = string.Join(", ", parameters.Select(p => $"{p.Name}: {p.ParameterType.Name}")); + + // Get return type + string returnType = method.ReturnType.Name; + + // Format function description + sb.AppendLine($"- `{functionName}({paramList}) -> {returnType}`"); + + // Add XML documentation if available (simplified for now) + sb.AppendLine($" Use this to {GetFriendlyDescription(toolType, functionName)}"); + } + + sb.AppendLine(); + } + + return sb.ToString(); + } + + /// + /// Gets a friendly description for a tool function based on its name and type + /// + private static string GetFriendlyDescription(Type toolType, string methodName) + { + // This could be expanded to use XML documentation comments or attributes + // For now we'll use simple heuristics based on the method name + + if (toolType == typeof(CMDPlugin)) + { + if (methodName == "ExecuteCommand") + return "execute a Windows command prompt (CMD) command"; + } + else if (toolType == typeof(PowerShellPlugin)) + { + if (methodName == "ExecuteScript") + return "execute a PowerShell script or command"; + } + else if (toolType == typeof(ScreenCapturePlugin)) + { + if (methodName == "ForegroundSelect") + return "bring a window to the foreground"; + if (methodName == "ListWindowHandles") + return "list available window handles"; + } + else if (toolType == typeof(KeyboardPlugin)) + { + if (methodName == "SendKeys") + return "simulate keyboard input"; + if (methodName == "TypeText") + return "type text using the keyboard"; + } + else if (toolType == typeof(MousePlugin)) + { + if (methodName == "ClickAtPosition") + return "perform a mouse click at specific coordinates"; + if (methodName == "MoveToPosition") + return "move the mouse cursor to specific coordinates"; + } + else if (toolType == typeof(WindowSelectionPlugin)) + { + if (methodName == "GetOpenWindows") + return "get a list of all open windows"; + if (methodName == "FocusWindow") + return "switch focus to a specific application window"; + } + else if (toolType == typeof(PlaywrightPlugin)) + { + // Playwright plugin method descriptions + switch (methodName) + { + case "IsBrowserActive": + return "check if a browser instance is already running"; + case "GetBrowserStatus": + return "get detailed information about the current browser status"; + case "LaunchBrowser": + return "launch a new browser instance or use the existing one"; + case "NavigateTo": + return "navigate to a specified URL in the browser" + + " with an optional wait strategy"; + case "SetSessionId": + return "set the active session ID for browser operations"; + case "EnableSessionPersistence": + return "enable or disable browser session persistence"; + case "SaveSession": + return "save the current browser session for future use"; + case "TakeScreenshot": + return "take a screenshot of the current browser page"; + case "ClickElement": + return "click on an element identified by a CSS selector"; + case "TypeText": + return "type text into an input field identified by a CSS selector"; + case "ListSessions": + return "list all available saved browser sessions"; + case "DeleteSession": + return "delete a saved browser session"; + case "CloseBrowser": + return "close the active browser and release all resources"; + case "GetPageContent": + return "get the full HTML content of the current page"; + case "GetElementText": + return "get the text content of an element by CSS selector"; + case "ExecuteScript": + return "execute a JavaScript snippet in the current browser page"; + } + } + + // Generic fallback based on method name + return methodName.ToLower().Replace("execute", "run").Replace("get", "retrieve"); + } + } +} diff --git a/FlowVision/lib/Classes/UI/ActivityMonitor.cs b/FlowVision/lib/Classes/UI/ActivityMonitor.cs new file mode 100644 index 0000000..faaa51b --- /dev/null +++ b/FlowVision/lib/Classes/UI/ActivityMonitor.cs @@ -0,0 +1,241 @@ +using System; +using System.Drawing; +using System.Windows.Forms; +using System.Collections.Generic; +using System.Linq; + +namespace FlowVision.lib.Classes.UI +{ + /// + /// Displays real-time activity and system status + /// + public class ActivityMonitor : UserControl + { + private Panel statusPanel; + private Label aiStatusLabel; + private Label ocrStatusLabel; + private Label browserStatusLabel; + private RichTextBox activityLog; + private List activities = new List(); + private const int MaxActivities = 100; + + public ActivityMonitor() + { + InitializeComponent(); + } + + private void InitializeComponent() + { + this.SuspendLayout(); + + // Status panel + statusPanel = new Panel + { + Location = new Point(0, 0), + Size = new Size(this.Width, 80), + Anchor = AnchorStyles.Top | AnchorStyles.Left | AnchorStyles.Right, + BackColor = Color.FromArgb(240, 240, 245), + BorderStyle = BorderStyle.FixedSingle + }; + + // AI Status + aiStatusLabel = CreateStatusLabel("🤖 AI: Ready", new Point(10, 10)); + + // OCR Status + ocrStatusLabel = CreateStatusLabel("👁️ ONNX: Ready", new Point(10, 35)); + + // Browser Status + browserStatusLabel = CreateStatusLabel("🌐 Browser: Inactive", new Point(10, 60)); + + statusPanel.Controls.AddRange(new Control[] { aiStatusLabel, ocrStatusLabel, browserStatusLabel }); + + // Activity log + activityLog = new RichTextBox + { + Location = new Point(0, 85), + Size = new Size(this.Width, this.Height - 85), + Anchor = AnchorStyles.Top | AnchorStyles.Bottom | AnchorStyles.Left | AnchorStyles.Right, + ReadOnly = true, + BackColor = Color.White, + Font = new Font("Consolas", 9F), + BorderStyle = BorderStyle.FixedSingle + }; + + this.Controls.AddRange(new Control[] { statusPanel, activityLog }); + this.Size = new Size(300, 500); + + this.ResumeLayout(false); + } + + private Label CreateStatusLabel(string text, Point location) + { + return new Label + { + Text = text, + Location = location, + AutoSize = true, + Font = new Font("Segoe UI", 9F) + }; + } + + public void UpdateAIStatus(string status, Color color) + { + if (aiStatusLabel.InvokeRequired) + { + aiStatusLabel.Invoke(new Action(() => UpdateAIStatus(status, color))); + return; + } + + aiStatusLabel.Text = $"🤖 AI: {status}"; + aiStatusLabel.ForeColor = color; + } + + public void UpdateONNXStatus(string status, Color color) + { + if (ocrStatusLabel.InvokeRequired) + { + ocrStatusLabel.Invoke(new Action(() => UpdateONNXStatus(status, color))); + return; + } + + ocrStatusLabel.Text = $"👁️ ONNX: {status}"; + ocrStatusLabel.ForeColor = color; + } + + public void UpdateBrowserStatus(string status, Color color) + { + if (browserStatusLabel.InvokeRequired) + { + browserStatusLabel.Invoke(new Action(() => UpdateBrowserStatus(status, color))); + return; + } + + browserStatusLabel.Text = $"🌐 Browser: {status}"; + browserStatusLabel.ForeColor = color; + } + + public void LogActivity(string category, string message, ActivityLevel level = ActivityLevel.Info) + { + if (activityLog.InvokeRequired) + { + activityLog.Invoke(new Action(() => LogActivity(category, message, level))); + return; + } + + var activity = new ActivityEntry + { + Timestamp = DateTime.Now, + Category = category, + Message = message, + Level = level + }; + + activities.Add(activity); + + // Keep only last MaxActivities entries + if (activities.Count > MaxActivities) + { + activities.RemoveAt(0); + } + + // Format and append to log + string icon = GetLevelIcon(level); + Color color = GetLevelColor(level); + string timestamp = activity.Timestamp.ToString("HH:mm:ss"); + + activityLog.SelectionStart = activityLog.TextLength; + activityLog.SelectionLength = 0; + + // Add timestamp + activityLog.SelectionColor = Color.Gray; + activityLog.AppendText($"[{timestamp}] "); + + // Add icon and category + activityLog.SelectionColor = color; + activityLog.AppendText($"{icon} {category}: "); + + // Add message + activityLog.SelectionColor = Color.Black; + activityLog.AppendText($"{message}\n"); + + // Auto-scroll to bottom + activityLog.SelectionStart = activityLog.Text.Length; + activityLog.ScrollToCaret(); + } + + private string GetLevelIcon(ActivityLevel level) + { + return level switch + { + ActivityLevel.Debug => "🔍", + ActivityLevel.Info => "ℹ️", + ActivityLevel.Success => "✅", + ActivityLevel.Warning => "⚠️", + ActivityLevel.Error => "❌", + _ => "•" + }; + } + + private Color GetLevelColor(ActivityLevel level) + { + return level switch + { + ActivityLevel.Debug => Color.Gray, + ActivityLevel.Info => Color.Blue, + ActivityLevel.Success => Color.Green, + ActivityLevel.Warning => Color.Orange, + ActivityLevel.Error => Color.Red, + _ => Color.Black + }; + } + + public void Clear() + { + activities.Clear(); + activityLog.Clear(); + } + + public void ExportLog(string filePath = null) + { + if (filePath == null) + { + using (SaveFileDialog saveDialog = new SaveFileDialog()) + { + saveDialog.Filter = "Text files (*.txt)|*.txt|All files (*.*)|*.*"; + saveDialog.DefaultExt = "txt"; + saveDialog.FileName = $"activity-log-{DateTime.Now:yyyy-MM-dd-HH-mm-ss}.txt"; + + if (saveDialog.ShowDialog() == DialogResult.OK) + { + filePath = saveDialog.FileName; + } + else + { + return; + } + } + } + + System.IO.File.WriteAllText(filePath, activityLog.Text); + MessageBox.Show($"Activity log exported to:\n{filePath}", "Export Complete", + MessageBoxButtons.OK, MessageBoxIcon.Information); + } + } + + public class ActivityEntry + { + public DateTime Timestamp { get; set; } + public string Category { get; set; } + public string Message { get; set; } + public ActivityLevel Level { get; set; } + } + + public enum ActivityLevel + { + Debug, + Info, + Success, + Warning, + Error + } +} diff --git a/FlowVision/lib/Classes/UI/ExecutionVisualizer.cs b/FlowVision/lib/Classes/UI/ExecutionVisualizer.cs new file mode 100644 index 0000000..36cd8e4 --- /dev/null +++ b/FlowVision/lib/Classes/UI/ExecutionVisualizer.cs @@ -0,0 +1,270 @@ +using System; +using System.Drawing; +using System.Windows.Forms; +using System.Collections.Generic; +using System.Linq; + +namespace FlowVision.lib.Classes.UI +{ + /// + /// Visual component showing step-by-step execution progress + /// + public class ExecutionVisualizer : UserControl + { + private FlowLayoutPanel stepsPanel; + private Label titleLabel; + private ProgressBar overallProgress; + private Label statusLabel; + private List steps = new List(); + private int currentStep = 0; + private int totalSteps = 0; + + public ExecutionVisualizer() + { + InitializeComponent(); + } + + private void InitializeComponent() + { + this.SuspendLayout(); + + // Title + titleLabel = new Label + { + Text = "Execution Progress", + Font = new Font("Segoe UI", 12F, FontStyle.Bold), + AutoSize = true, + Location = new Point(10, 10) + }; + + // Status label + statusLabel = new Label + { + Text = "Ready", + Font = new Font("Segoe UI", 9F), + AutoSize = true, + Location = new Point(10, 40), + ForeColor = Color.Gray + }; + + // Overall progress bar + overallProgress = new ProgressBar + { + Location = new Point(10, 65), + Size = new Size(this.Width - 20, 20), + Anchor = AnchorStyles.Top | AnchorStyles.Left | AnchorStyles.Right, + Style = ProgressBarStyle.Continuous + }; + + // Steps panel + stepsPanel = new FlowLayoutPanel + { + Location = new Point(10, 95), + Size = new Size(this.Width - 20, this.Height - 105), + Anchor = AnchorStyles.Top | AnchorStyles.Bottom | AnchorStyles.Left | AnchorStyles.Right, + AutoScroll = true, + FlowDirection = FlowDirection.TopDown, + WrapContents = false + }; + + this.Controls.AddRange(new Control[] { titleLabel, statusLabel, overallProgress, stepsPanel }); + this.Size = new Size(400, 500); + this.BackColor = Color.White; + + this.ResumeLayout(false); + this.PerformLayout(); + } + + public void StartExecution(int totalSteps) + { + this.totalSteps = totalSteps; + this.currentStep = 0; + steps.Clear(); + stepsPanel.Controls.Clear(); + overallProgress.Maximum = totalSteps; + overallProgress.Value = 0; + statusLabel.Text = $"Starting execution: {totalSteps} steps planned"; + statusLabel.ForeColor = Color.Blue; + } + + public void AddStep(string description, string icon = "⏳") + { + var step = new ExecutionStep + { + Description = description, + Status = StepStatus.Pending, + Icon = icon, + Index = steps.Count + 1 + }; + steps.Add(step); + + var stepControl = CreateStepControl(step); + stepsPanel.Controls.Add(stepControl); + + // Auto-scroll to bottom + stepsPanel.ScrollControlIntoView(stepControl); + } + + public void UpdateStep(int stepIndex, StepStatus status, string result = null) + { + if (stepIndex < 0 || stepIndex >= steps.Count) + return; + + var step = steps[stepIndex]; + step.Status = status; + step.Result = result; + + switch (status) + { + case StepStatus.InProgress: + step.Icon = "⚙️"; + currentStep = stepIndex; + statusLabel.Text = $"Step {stepIndex + 1}/{totalSteps}: {step.Description}"; + statusLabel.ForeColor = Color.Blue; + break; + case StepStatus.Completed: + step.Icon = "✅"; + overallProgress.Value = Math.Min(stepIndex + 1, totalSteps); + statusLabel.Text = $"Completed step {stepIndex + 1}/{totalSteps}"; + statusLabel.ForeColor = Color.Green; + break; + case StepStatus.Failed: + step.Icon = "❌"; + statusLabel.Text = $"Step {stepIndex + 1} failed: {result}"; + statusLabel.ForeColor = Color.Red; + break; + case StepStatus.Skipped: + step.Icon = "⏭️"; + break; + } + + // Update the visual control + if (stepIndex < stepsPanel.Controls.Count) + { + var control = stepsPanel.Controls[stepIndex]; + UpdateStepControl(control, step); + } + } + + public void CompleteExecution(bool success) + { + if (success) + { + statusLabel.Text = $"✅ Execution completed successfully ({steps.Count} steps)"; + statusLabel.ForeColor = Color.Green; + overallProgress.Value = overallProgress.Maximum; + } + else + { + statusLabel.Text = $"❌ Execution failed at step {currentStep + 1}"; + statusLabel.ForeColor = Color.Red; + } + } + + private Control CreateStepControl(ExecutionStep step) + { + var panel = new Panel + { + Size = new Size(stepsPanel.Width - 25, 60), + BorderStyle = BorderStyle.FixedSingle, + Padding = new Padding(5), + Margin = new Padding(0, 0, 0, 5), + BackColor = Color.WhiteSmoke, + Tag = step + }; + + var iconLabel = new Label + { + Text = step.Icon, + Font = new Font("Segoe UI", 14F), + Size = new Size(30, 30), + Location = new Point(5, 15), + TextAlign = ContentAlignment.MiddleCenter + }; + + var indexLabel = new Label + { + Text = $"#{step.Index}", + Font = new Font("Segoe UI", 8F, FontStyle.Bold), + Size = new Size(30, 15), + Location = new Point(5, 5), + ForeColor = Color.Gray + }; + + var descLabel = new Label + { + Text = step.Description, + Font = new Font("Segoe UI", 9F), + Size = new Size(panel.Width - 45, 40), + Location = new Point(40, 10), + AutoEllipsis = true + }; + + panel.Controls.AddRange(new Control[] { iconLabel, indexLabel, descLabel }); + return panel; + } + + private void UpdateStepControl(Control control, ExecutionStep step) + { + if (control is Panel panel && panel.Tag == step) + { + // Update icon + var iconLabel = panel.Controls.OfType().FirstOrDefault(l => l.Size.Width == 30 && l.Location.X == 5 && l.Location.Y == 15); + if (iconLabel != null) + { + iconLabel.Text = step.Icon; + } + + // Update background color based on status + switch (step.Status) + { + case StepStatus.InProgress: + panel.BackColor = Color.LightBlue; + break; + case StepStatus.Completed: + panel.BackColor = Color.LightGreen; + break; + case StepStatus.Failed: + panel.BackColor = Color.LightCoral; + break; + case StepStatus.Skipped: + panel.BackColor = Color.LightGray; + break; + } + } + } + + public void Clear() + { + steps.Clear(); + stepsPanel.Controls.Clear(); + overallProgress.Value = 0; + statusLabel.Text = "Ready"; + statusLabel.ForeColor = Color.Gray; + currentStep = 0; + totalSteps = 0; + } + } + + public class ExecutionStep + { + public int Index { get; set; } + public string Description { get; set; } + public StepStatus Status { get; set; } + public string Icon { get; set; } + public string Result { get; set; } + public DateTime StartTime { get; set; } + public DateTime? EndTime { get; set; } + + public TimeSpan? Duration => EndTime.HasValue ? EndTime.Value - StartTime : null; + } + + public enum StepStatus + { + Pending, + InProgress, + Completed, + Failed, + Skipped + } +} diff --git a/FlowVision/lib/Classes/UI/ThemeColors.cs b/FlowVision/lib/Classes/UI/ThemeColors.cs new file mode 100644 index 0000000..b73ee94 --- /dev/null +++ b/FlowVision/lib/Classes/UI/ThemeColors.cs @@ -0,0 +1,41 @@ +using System; +using System.Drawing; + +namespace FlowVision.lib.UI +{ + /// + /// Defines color schemes for light and dark themes + /// + public static class ThemeColors + { + // Light theme colors + public static class Light + { + public static readonly Color Background = Color.White; + public static readonly Color Text = Color.Black; + public static readonly Color ButtonBackground = SystemColors.Control; + public static readonly Color ButtonText = Color.Black; + public static readonly Color ButtonBorder = SystemColors.ControlDark; + public static readonly Color TextBoxBackground = SystemColors.Window; + public static readonly Color TextBoxText = Color.Black; + public static readonly Color TextBoxBorder = SystemColors.ControlDark; + public static readonly Color TabBackground = Color.White; + public static readonly Color TabText = Color.Black; + } + + // Dark theme colors + public static class Dark + { + public static readonly Color Background = Color.FromArgb(45, 45, 48); + public static readonly Color Text = Color.White; + public static readonly Color ButtonBackground = Color.FromArgb(60, 60, 65); + public static readonly Color ButtonText = Color.White; + public static readonly Color ButtonBorder = Color.FromArgb(80, 80, 85); + public static readonly Color TextBoxBackground = Color.FromArgb(30, 30, 35); + public static readonly Color TextBoxText = Color.White; + public static readonly Color TextBoxBorder = Color.FromArgb(80, 80, 85); + public static readonly Color TabBackground = Color.FromArgb(45, 45, 48); + public static readonly Color TabText = Color.White; + } + } +} diff --git a/FlowVision/lib/Classes/UI/ThemeManager.cs b/FlowVision/lib/Classes/UI/ThemeManager.cs new file mode 100644 index 0000000..8cc907c --- /dev/null +++ b/FlowVision/lib/Classes/UI/ThemeManager.cs @@ -0,0 +1,195 @@ +using System; +using System.Drawing; +using System.IO; +using System.Text.Json; +using System.Windows.Forms; + +namespace FlowVision.lib.UI +{ + /// + /// Manages application themes and theme-related settings + /// + public class ThemeManager + { + private const string DefaultTheme = "Light"; + private string _currentTheme; + private readonly string _themePath; + + public ThemeManager() + { + // Create theme settings path + _themePath = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "Settings", "theme.json"); + + // Create directory if it doesn't exist + Directory.CreateDirectory(Path.GetDirectoryName(_themePath)); + + // Load theme from file or use default + _currentTheme = LoadThemeFromFile(); + } + + /// + /// Gets or sets the current theme + /// + public string CurrentTheme + { + get => _currentTheme; + set + { + _currentTheme = value; + SaveThemeToFile(_currentTheme); + } + } + + /// + /// Applies theme colors to a button control + /// + /// Button to apply theme to + public void ApplyThemeToButton(Button button) + { + if (_currentTheme == "Dark") + { + button.BackColor = ThemeColors.Dark.ButtonBackground; + button.ForeColor = ThemeColors.Dark.ButtonText; + button.FlatStyle = FlatStyle.Flat; + button.FlatAppearance.BorderColor = ThemeColors.Dark.ButtonBorder; + } + else + { + button.BackColor = ThemeColors.Light.ButtonBackground; + button.ForeColor = ThemeColors.Light.ButtonText; + button.FlatStyle = FlatStyle.Standard; + } + } + + /// + /// Applies theme colors to a textbox control + /// + /// TextBox to apply theme to + public void ApplyThemeToTextBox(TextBoxBase textBox) + { + if (_currentTheme == "Dark") + { + textBox.BackColor = ThemeColors.Dark.TextBoxBackground; + textBox.ForeColor = ThemeColors.Dark.TextBoxText; + textBox.BorderStyle = BorderStyle.FixedSingle; + } + else + { + textBox.BackColor = ThemeColors.Light.TextBoxBackground; + textBox.ForeColor = ThemeColors.Light.TextBoxText; + textBox.BorderStyle = BorderStyle.Fixed3D; + } + } + + /// + /// Applies theme colors to all controls in a container + /// + /// Container with controls to theme + public void ApplyThemeToControls(Control container) + { + // Set container colors + if (_currentTheme == "Dark") + { + container.BackColor = ThemeColors.Dark.Background; + container.ForeColor = ThemeColors.Dark.Text; + } + else + { + container.BackColor = ThemeColors.Light.Background; + container.ForeColor = ThemeColors.Light.Text; + } + + // Process all child controls recursively + foreach (Control control in container.Controls) + { + if (control is Button button) + { + ApplyThemeToButton(button); + } + else if (control is TextBoxBase textBox) + { + ApplyThemeToTextBox(textBox); + } + else if (control is TabPage tabPage) + { + // Apply theme to tab page + if (_currentTheme == "Dark") + { + tabPage.BackColor = ThemeColors.Dark.TabBackground; + tabPage.ForeColor = ThemeColors.Dark.TabText; + } + else + { + tabPage.BackColor = ThemeColors.Light.TabBackground; + tabPage.ForeColor = ThemeColors.Light.TabText; + } + + // Process tab page controls + ApplyThemeToControls(tabPage); + } + else if (control is GroupBox) + { + // Apply theme to group box + if (_currentTheme == "Dark") + { + control.BackColor = ThemeColors.Dark.Background; + control.ForeColor = ThemeColors.Dark.Text; + } + else + { + control.BackColor = ThemeColors.Light.Background; + control.ForeColor = ThemeColors.Light.Text; + } + + // Process group box controls + ApplyThemeToControls(control); + } + else if (control.Controls.Count > 0) + { + // Recursively apply theme to container controls + ApplyThemeToControls(control); + } + } + } + + private string LoadThemeFromFile() + { + try + { + if (File.Exists(_themePath)) + { + string jsonContent = File.ReadAllText(_themePath); + var themeSetting = JsonSerializer.Deserialize(jsonContent); + return themeSetting?.Name ?? DefaultTheme; + } + } + catch (Exception ex) + { + Console.WriteLine($"Error loading theme: {ex.Message}"); + } + + return DefaultTheme; + } + + private void SaveThemeToFile(string themeName) + { + try + { + var themeSetting = new ThemeSetting { Name = themeName }; + string jsonContent = JsonSerializer.Serialize(themeSetting, new JsonSerializerOptions { WriteIndented = true }); + File.WriteAllText(_themePath, jsonContent); + } + catch (Exception ex) + { + Console.WriteLine($"Error saving theme: {ex.Message}"); + } + } + + private class ThemeSetting + { + public string Name { get; set; } + } + } +} diff --git a/FlowVision/lib/Classes/ai/AIClientFactory.cs b/FlowVision/lib/Classes/ai/AIClientFactory.cs new file mode 100644 index 0000000..45489e0 --- /dev/null +++ b/FlowVision/lib/Classes/ai/AIClientFactory.cs @@ -0,0 +1,52 @@ +using System; +using Microsoft.Extensions.AI; +using Azure.AI.OpenAI; +using Azure; +using OpenAI; + +namespace FlowVision.lib.Classes.ai +{ + public static class AIClientFactory + { + public static IChatClient CreateClient(APIConfig config) + { + if (config == null) + throw new ArgumentNullException(nameof(config)); + + if (string.IsNullOrWhiteSpace(config.EndpointURL)) + throw new ArgumentException("EndpointURL is required but was null or empty", nameof(config)); + + if (string.IsNullOrWhiteSpace(config.APIKey)) + throw new ArgumentException("APIKey is required but was null or empty", nameof(config)); + + switch (config.ProviderType?.ToLowerInvariant()) + { + case "gemini": + // Use standard OpenAI client pointing to Google's endpoint + // Endpoint format: https://generativelanguage.googleapis.com/v1beta/openai/ + var geminiClient = new OpenAIClient( + new System.ClientModel.ApiKeyCredential(config.APIKey), + new OpenAIClientOptions { Endpoint = new Uri(config.EndpointURL) } + ); + return geminiClient.GetChatClient(config.DeploymentName).AsIChatClient(); + + case "lmstudio": + case "openai": // Generic OpenAI compatible + var openAIClient = new OpenAIClient( + new System.ClientModel.ApiKeyCredential(config.APIKey), + new OpenAIClientOptions { Endpoint = new Uri(config.EndpointURL) } + ); + return openAIClient.GetChatClient(config.DeploymentName).AsIChatClient(); + + case "azureopenai": + default: + // Default to Azure OpenAI + var azureClient = new AzureOpenAIClient( + new Uri(config.EndpointURL), + new AzureKeyCredential(config.APIKey) + ); + return azureClient.GetChatClient(config.DeploymentName).AsIChatClient(); + } + } + } +} diff --git a/FlowVision/lib/Classes/ai/Actioner.cs b/FlowVision/lib/Classes/ai/Actioner.cs index 8155570..01efff4 100644 --- a/FlowVision/lib/Classes/ai/Actioner.cs +++ b/FlowVision/lib/Classes/ai/Actioner.cs @@ -1,38 +1,57 @@ -using System; +using System; using System.Collections.Generic; using System.Linq; +using System.Reflection; +using System.Text; +using System.Threading.Tasks; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using FlowVision.lib.Plugins; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.ChatCompletion; -using Microsoft.SemanticKernel.Connectors.OpenAI; +using Microsoft.Extensions.AI; +using FlowVision.lib.Classes.ai; +using FlowVision; // Required for Form1 namespace FlowVision.lib.Classes { public class Actioner { - private IChatCompletionService actionerChat; - private ChatHistory actionerHistory; - private Kernel actionerKernel; - private RichTextBox outputTextBox; + private IChatClient actionerChat; + private List actionerHistory; private const string ACTIONER_CONFIG = "actioner"; - private const string TOOL_CONFIG = "toolsconfig"; // Added constant for tool config + private const string TOOL_CONFIG = "toolsconfig"; + + private SimpleAgentActioner simpleAgentActioner; + private LMStudioActioner lmStudioActioner; + private bool useMultiAgentMode = false; public object ToolCallBehavior { get; private set; } // Update the Actioner constructor to support both the delegate and the RichTextBox approaches public Actioner(Form1.PluginOutputHandler outputHandler) { - actionerHistory = new ChatHistory(); + actionerHistory = new List(); // Create a RichTextBox that isn't displayed but used for logging var hiddenTextBox = new RichTextBox { Visible = false }; // Initialize the plugin logger with the hidden text box - PluginLogger.Initialize(hiddenTextBox); + // Pass the direct UI update delegate that takes the message and adds it to the chat UI + if (Application.OpenForms.Count > 0 && Application.OpenForms[0] is Form1 mainForm) + { + // Get reference to the AddMessage method on the Form1 instance + Action addMessageAction = mainForm.AddMessage; + PluginLogger.Initialize(hiddenTextBox, addMessageAction); + } + else + { + // Fall back to just using the hidden text box + PluginLogger.Initialize(hiddenTextBox); + } // Override the UpdateUI method to use our output handler if (outputHandler != null) @@ -48,132 +67,199 @@ public Actioner(Form1.PluginOutputHandler outputHandler) }; } - this.outputTextBox = hiddenTextBox; + // Initialize the simple agent actioner and LM Studio actioner with the same output handler + simpleAgentActioner = new SimpleAgentActioner(outputHandler); + lmStudioActioner = new LMStudioActioner(outputHandler); + + // Start remote control server if enabled + ToolConfig toolConfigInit = ToolConfig.LoadConfig(TOOL_CONFIG); + if (toolConfigInit.EnableRemoteControl) + { + RemoteControlPlugin.SetCommandHandler(async cmd => await ExecuteAction(cmd)); + RemoteControlPlugin.StartServer(toolConfigInit.RemoteControlPort); + } } - public async Task ExecuteAction(string actionPrompt) + // Add method to toggle multi-agent mode + public void SetMultiAgentMode(bool enabled) { + useMultiAgentMode = enabled; + } - ToolConfig toolConfig = ToolConfig.LoadConfig(TOOL_CONFIG); - // Add system message to actioner history - actionerHistory.AddSystemMessage(toolConfig.SystemPrompt); - - // Add action prompt to actioner history - actionerHistory.AddUserMessage(actionPrompt); - - // Load actioner model config - APIConfig config = APIConfig.LoadConfig(ACTIONER_CONFIG); - // Changed to use the same config file as ToolConfigForm - - if (string.IsNullOrWhiteSpace(config.DeploymentName) || - string.IsNullOrWhiteSpace(config.EndpointURL) || - string.IsNullOrWhiteSpace(config.APIKey)) - { - outputTextBox.AppendText("Error: Actioner model not configured\n\n"); - return "Error: Actioner model not configured"; - } - - // Setup the kernel for actioner with plugins - var builder = Kernel.CreateBuilder(); - builder.AddAzureOpenAIChatCompletion( - config.DeploymentName, - config.EndpointURL, - config.APIKey); - - // Configure OpenAI settings based on toolConfig - var settings = new OpenAIPromptExecutionSettings - { - Temperature = toolConfig.Temperature, - ToolCallBehavior = toolConfig.AutoInvokeKernelFunctions - ? Microsoft.SemanticKernel.Connectors.OpenAI.ToolCallBehavior.AutoInvokeKernelFunctions - : Microsoft.SemanticKernel.Connectors.OpenAI.ToolCallBehavior.EnableKernelFunctions - }; - - // Log which plugins are being enabled - outputTextBox.AppendText("Enabling the following plugins:\n"); - - // Add plugins dynamically based on tool configuration - if (toolConfig.EnableCMDPlugin) - { - builder.Plugins.AddFromType(); - outputTextBox.AppendText("- CMD Plugin\n"); - } - - if (toolConfig.EnablePowerShellPlugin) - { - builder.Plugins.AddFromType(); - outputTextBox.AppendText("- PowerShell Plugin\n"); - } - - if (toolConfig.EnableScreenCapturePlugin) - { - builder.Plugins.AddFromType(); - outputTextBox.AppendText("- Screen Capture Plugin\n"); - } - - if (toolConfig.EnableKeyboardPlugin) - { - builder.Plugins.AddFromType(); - outputTextBox.AppendText("- Keyboard Plugin\n"); - } - - if (toolConfig.EnableMousePlugin) + public async Task ExecuteAction(string actionPrompt) + { + // Check if LM Studio is enabled first + var lmStudioConfig = LMStudioConfig.LoadConfig(); + if (lmStudioConfig.Enabled) { - builder.Plugins.AddFromType(); - outputTextBox.AppendText("- Mouse Plugin\n"); + return await lmStudioActioner.ExecuteAction(actionPrompt); } - - if (toolConfig.EnableWindowSelectionPlugin) // Add this conditional + + // If multi-agent mode is enabled, use the simple agent actioner + if (useMultiAgentMode) { - builder.Plugins.AddFromType(); - outputTextBox.AppendText("- Window Selection Plugin\n"); + return await simpleAgentActioner.ExecuteAction(actionPrompt); } - outputTextBox.AppendText("\n"); + // Otherwise use the original Azure implementation + ToolConfig toolConfig = ToolConfig.LoadConfig(TOOL_CONFIG); - builder.Services.AddSingleton(outputTextBox); + // Generate tool descriptions if dynamic prompts are enabled + string toolDescriptions = toolConfig.DynamicToolPrompts + ? "\n\n" + ToolDescriptionGenerator.GetToolDescriptions(toolConfig) + : string.Empty; - actionerKernel = builder.Build(); - actionerChat = actionerKernel.GetRequiredService(); + // Notify that we're starting the action execution + PluginLogger.NotifyTaskStart("Action Execution", "Processing your request"); + PluginLogger.StartLoadingIndicator("request"); - // Process the response - var responseBuilder = new StringBuilder(); - // Uncomment for Open AI - var responseStream = actionerChat.GetStreamingChatMessageContentsAsync(actionerHistory, settings, actionerKernel); - var enumerator = responseStream.GetAsyncEnumerator(); try { - while (await enumerator.MoveNextAsync()) + // Add system message to actioner history + actionerHistory.Add(new ChatMessage(ChatRole.System, toolConfig.ActionerSystemPrompt + toolDescriptions)); + + // Add action prompt to actioner history + actionerHistory.Add(new ChatMessage(ChatRole.User, actionPrompt)); + + // Load actioner model config + APIConfig config = APIConfig.LoadConfig(ACTIONER_CONFIG); + + if (string.IsNullOrWhiteSpace(config.DeploymentName) || + string.IsNullOrWhiteSpace(config.EndpointURL) || + string.IsNullOrWhiteSpace(config.APIKey)) + { + PluginLogger.NotifyTaskComplete("Action Execution", false); + return "Error: Actioner model not configured"; + } + + // Use the Factory to create the client based on ProviderType + IChatClient baseChatClient = AIClientFactory.CreateClient(config); + + // Collect tools based on configuration + var tools = new List(); + + if (toolConfig.EnableCMDPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new CMDPlugin())); + } + + if (toolConfig.EnablePowerShellPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new PowerShellPlugin())); + } + + if (toolConfig.EnableScreenCapturePlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new ScreenCapturePlugin())); + } + + if (toolConfig.EnableKeyboardPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new KeyboardPlugin())); + } + + if (toolConfig.EnableMousePlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new MousePlugin())); + } + + if (toolConfig.EnableWindowSelectionPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new WindowSelectionPlugin())); + } + + if (toolConfig.EnablePlaywrightPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(PlaywrightPlugin.Instance)); + } + + if (toolConfig.EnableRemoteControl) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new RemoteControlPlugin())); + } + + if (toolConfig.EnableClipboardPlugin) { - var message = enumerator.Current; - if (message.Content == "None") continue; - responseBuilder.Append(message.Content); + tools.AddRange(PluginToolExtractor.ExtractTools(new ClipboardPlugin())); } + + if (toolConfig.EnableFileSystemPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new FileSystemPlugin())); + } + + // Configure chat options with tools + var chatOptions = new ChatOptions + { + Temperature = (float)toolConfig.Temperature, + Tools = tools + }; + + // Build chat client with function invocation if enabled + actionerChat = toolConfig.AutoInvokeKernelFunctions + ? new ChatClientBuilder(baseChatClient).UseFunctionInvocation().Build() + : baseChatClient; + + // Update loading message to show we're now processing the response + PluginLogger.StopLoadingIndicator(); + PluginLogger.StartLoadingIndicator("AI response"); + + // Process the response with streaming + var responseBuilder = new StringBuilder(); + try + { + await foreach (var update in actionerChat.GetStreamingResponseAsync(actionerHistory, chatOptions)) + { + if (update.Text != null) + { + responseBuilder.Append(update.Text); + } + } + } + catch (Exception ex) when (ex.Message.Contains("Unknown ChatFinishReason") || ex.Message.Contains("function_call_filter")) + { + PluginLogger.LogInfo("Actioner", "ExecuteAction", $"Ignored known SDK finish reason error: {ex.Message}"); + } + + // Task completed successfully + PluginLogger.NotifyTaskComplete("Action Execution", true); + + var response = responseBuilder.ToString(); + + // Add assistant response to history + if (!string.IsNullOrEmpty(response)) + { + actionerHistory.Add(new ChatMessage(ChatRole.Assistant, response)); + } + + return response; } - finally + catch (Exception ex) { - await enumerator.DisposeAsync(); + // Task failed + PluginLogger.NotifyTaskComplete("Action Execution", false); + return $"Error: {ex.Message}"; } - - string response = responseBuilder.ToString(); - - return responseBuilder.ToString(); } - internal void SetChatHistory(List chatHistory) + internal void SetChatHistory(System.Collections.Generic.List chatHistory) { actionerHistory.Clear(); foreach (var message in chatHistory) { if (message.Author == "You") { - actionerHistory.AddUserMessage(message.Content); + actionerHistory.Add(new ChatMessage(ChatRole.User, message.Content)); } else if (message.Author == "AI") { - actionerHistory.AddAssistantMessage(message.Content); + actionerHistory.Add(new ChatMessage(ChatRole.Assistant, message.Content)); } } + + // Also update the simple agent and LM Studio chat history + simpleAgentActioner.SetChatHistory(chatHistory); + lmStudioActioner.SetChatHistory(chatHistory); } } } \ No newline at end of file diff --git a/FlowVision/lib/Classes/ai/AgentCoordinator.cs b/FlowVision/lib/Classes/ai/AgentCoordinator.cs new file mode 100644 index 0000000..e69de29 diff --git a/FlowVision/lib/Classes/ai/AgentRole.cs b/FlowVision/lib/Classes/ai/AgentRole.cs new file mode 100644 index 0000000..e1d49f8 --- /dev/null +++ b/FlowVision/lib/Classes/ai/AgentRole.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace FlowVision.lib.Classes +{ + /// + /// Represents the different roles in a multi-agent workflow + /// +} diff --git a/FlowVision/lib/Classes/ai/Github_Actioner.cs b/FlowVision/lib/Classes/ai/Github_Actioner.cs index 855683d..7bd44be 100644 --- a/FlowVision/lib/Classes/ai/Github_Actioner.cs +++ b/FlowVision/lib/Classes/ai/Github_Actioner.cs @@ -1,4 +1,5 @@ -using System; +using System; +using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; @@ -7,42 +8,40 @@ using Azure.AI.Inference; using FlowVision.lib.Plugins; using FlowVision.Properties; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.ChatCompletion; -using Microsoft.SemanticKernel.Connectors.AzureAIInference; -using Microsoft.SemanticKernel.Connectors.OpenAI; +using Microsoft.Extensions.AI; +using Azure.AI.OpenAI; +using ChatMessage = Microsoft.Extensions.AI.ChatMessage; +using ChatRole = Microsoft.Extensions.AI.ChatRole; namespace FlowVision.lib.Classes { public class Github_Actioner { - private IChatCompletionService _chat; - private ChatHistory _history; - private Kernel _kernel; - private RichTextBox _output; + private IChatClient _chat; + private List _history; + private RichTextBox _output; private const string ACTIONER_CONFIG = "github"; private const string TOOL_CONFIG = "toolsconfig"; public Github_Actioner(RichTextBox outputTextBox) { _output = outputTextBox; - _history = new ChatHistory(); + _history = new List(); // Load tool config to get system message ToolConfig toolConfig = ToolConfig.LoadConfig(TOOL_CONFIG); // Set system message from config - _history.AddSystemMessage(toolConfig.SystemPrompt); + _history.Add(new ChatMessage(ChatRole.System, toolConfig.ActionerSystemPrompt)); } public async Task ExecuteAction(string actionPrompt) { - _history.AddUserMessage(actionPrompt); + _history.Add(new ChatMessage(ChatRole.User, actionPrompt)); // 1. Load your GitHub‑Models config APIConfig config = APIConfig.LoadConfig(ACTIONER_CONFIG); - ToolConfig toolConfig = ToolConfig.LoadConfig(ACTIONER_CONFIG); + ToolConfig toolConfig = ToolConfig.LoadConfig(TOOL_CONFIG); if (string.IsNullOrWhiteSpace(config.EndpointURL) || string.IsNullOrWhiteSpace(config.APIKey) || @@ -58,57 +57,45 @@ public async Task ExecuteAction(string actionPrompt) new AzureKeyCredential(config.APIKey) // your GITHUB_TOKEN ); - // 3. Wire it into Semantic Kernel - var builder = Kernel.CreateBuilder(); -#pragma warning disable SKEXP0070 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - builder.AddAzureAIInferenceChatCompletion( - modelId: config.DeploymentName, // e.g. "openai/gpt-4.1" - chatClient: chatClient - ); -#pragma warning restore SKEXP0070 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - - // 4. Add your tool‑plugins - builder.Plugins.AddFromType(); - builder.Plugins.AddFromType(); - builder.Plugins.AddFromType(); - builder.Plugins.AddFromType(); - builder.Plugins.AddFromType(); + // 3. Convert to IChatClient using Microsoft.Extensions.AI + var aiChatClient = chatClient.AsIChatClient(config.DeploymentName); - // 5. Inject the output box for plugins that need it - builder.Services.AddSingleton(_output); + // 4. Add tool plugins using helper + var tools = new List(); + + tools.AddRange(PluginToolExtractor.ExtractTools(new CMDPlugin())); + tools.AddRange(PluginToolExtractor.ExtractTools(new PowerShellPlugin())); + tools.AddRange(PluginToolExtractor.ExtractTools(new ScreenCapturePlugin())); + tools.AddRange(PluginToolExtractor.ExtractTools(new KeyboardPlugin())); + tools.AddRange(PluginToolExtractor.ExtractTools(new MousePlugin())); - // 6. Build and grab IChatCompletionService - _kernel = builder.Build(); - _chat = _kernel.GetRequiredService(); + // 5. Enable function invocation + _chat = new ChatClientBuilder(aiChatClient).UseFunctionInvocation().Build(); - // 7. Define your PromptExecutionSettings - var settings = new OpenAIPromptExecutionSettings + // 6. Define your chat options + var options = new ChatOptions { - Temperature = 0.2, - ToolCallBehavior = ToolCallBehavior.AutoInvokeKernelFunctions + Temperature = 0.2f, + Tools = tools }; - // Process the response + // Process the response with streaming var responseBuilder = new StringBuilder(); - // Uncomment for Open AI - var responseStream = _chat.GetStreamingChatMessageContentsAsync(_history, settings, _kernel); - var enumerator = responseStream.GetAsyncEnumerator(); - try + await foreach (var update in _chat.GetStreamingResponseAsync(_history, options)) { - while (await enumerator.MoveNextAsync()) + if (update.Text != null) { - var message = enumerator.Current; - if (message.Content == "None") continue; - responseBuilder.Append(message.Content); + responseBuilder.Append(update.Text); } } - finally - { - await enumerator.DisposeAsync(); - } string response = responseBuilder.ToString(); - + + // Add response to history + if (!string.IsNullOrEmpty(response)) + { + _history.Add(new ChatMessage(ChatRole.Assistant, response)); + } return response; } diff --git a/FlowVision/lib/Classes/ai/LMStudioActioner.cs b/FlowVision/lib/Classes/ai/LMStudioActioner.cs new file mode 100644 index 0000000..3d0f242 --- /dev/null +++ b/FlowVision/lib/Classes/ai/LMStudioActioner.cs @@ -0,0 +1,286 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text; +using System.Threading.Tasks; +using System.Windows.Forms; +using FlowVision.lib.Plugins; +using Microsoft.Extensions.AI; +using OpenAI; +using ChatMessage = Microsoft.Extensions.AI.ChatMessage; +using FlowVision; // Required for Form1 + +namespace FlowVision.lib.Classes +{ + /// + /// Local AI Actioner using LM Studio (OpenAI-compatible API) + /// + public class LMStudioActioner + { + private IChatClient actionerChat; + private List actionerHistory; + private const string TOOL_CONFIG = "toolsconfig"; + private LMStudioConfig lmStudioConfig; + + public LMStudioActioner(Form1.PluginOutputHandler outputHandler) + { + actionerHistory = new List(); + lmStudioConfig = LMStudioConfig.LoadConfig(); + + // Create a RichTextBox that isn't displayed but used for logging + var hiddenTextBox = new RichTextBox { Visible = false }; + + // Initialize the plugin logger + if (Application.OpenForms.Count > 0 && Application.OpenForms[0] is Form1 mainForm) + { + Action addMessageAction = mainForm.AddMessage; + PluginLogger.Initialize(hiddenTextBox, addMessageAction); + } + else + { + PluginLogger.Initialize(hiddenTextBox); + } + + // Override the UpdateUI method to use our output handler + if (outputHandler != null) + { + hiddenTextBox.TextChanged += (sender, e) => + { + string newText = hiddenTextBox.Lines.LastOrDefault(); + if (!string.IsNullOrEmpty(newText)) + { + outputHandler(newText); + } + }; + } + + // Start remote control server if enabled + ToolConfig toolConfig = ToolConfig.LoadConfig(TOOL_CONFIG); + if (toolConfig.EnableRemoteControl) + { + RemoteControlPlugin.SetCommandHandler(async cmd => await ExecuteAction(cmd)); + RemoteControlPlugin.StartServer(toolConfig.RemoteControlPort); + } + } + + public async Task ExecuteAction(string actionPrompt) + { + // Reload configs + lmStudioConfig = LMStudioConfig.LoadConfig(); + ToolConfig toolConfig = ToolConfig.LoadConfig(TOOL_CONFIG); + + // Generate tool descriptions if dynamic prompts are enabled + string toolDescriptions = toolConfig.DynamicToolPrompts + ? "\n\n" + ToolDescriptionGenerator.GetToolDescriptions(toolConfig) + : string.Empty; + + // Notify that we're starting the action execution + PluginLogger.NotifyTaskStart("LM Studio Action Execution", "Processing your request with local AI"); + PluginLogger.StartLoadingIndicator("request"); + + try + { + // Add system message to actioner history + actionerHistory.Add(new ChatMessage(ChatRole.System, toolConfig.ActionerSystemPrompt + toolDescriptions)); + + // Add action prompt with explicit instruction to EXECUTE + string enhancedPrompt = $@"{actionPrompt} + +IMPORTANT REMINDER: +1. DO NOT just observe and describe - you must EXECUTE the action! +2. After GetPageElements(), you MUST continue to actually click/type/interact +3. Follow ALL steps: Observe → Plan → EXECUTE → Verify +4. Do not stop until you've performed the actual action requested"; + + actionerHistory.Add(new ChatMessage(ChatRole.User, enhancedPrompt)); + + // Verify LM Studio is enabled and configured + if (!lmStudioConfig.Enabled) + { + PluginLogger.NotifyTaskComplete("LM Studio Action Execution", false); + return "Error: LM Studio integration is not enabled. Please enable it in the configuration."; + } + + if (string.IsNullOrWhiteSpace(lmStudioConfig.EndpointURL)) + { + PluginLogger.NotifyTaskComplete("LM Studio Action Execution", false); + return "Error: LM Studio endpoint not configured. Default is http://localhost:1234/v1"; + } + + // Check for invalid config load + if (!lmStudioConfig.IsValid) + { + PluginLogger.NotifyTaskComplete("LM Studio Action Execution", false); + return "Error: LM Studio configuration file is corrupt. Please go to settings and re-save the configuration."; + } + + // Validate URI format before attempting to create client + if (!Uri.TryCreate(lmStudioConfig.EndpointURL, UriKind.Absolute, out _)) + { + PluginLogger.NotifyTaskComplete("LM Studio Action Execution", false); + return $"Error: Invalid Endpoint URL '{lmStudioConfig.EndpointURL}'. Please correct it in settings."; + } + + // Create OpenAI client pointing to LM Studio + // LM Studio provides an OpenAI-compatible API + var openAIClient = new OpenAIClient(new System.ClientModel.ApiKeyCredential(lmStudioConfig.APIKey), new OpenAIClientOptions + { + Endpoint = new Uri(lmStudioConfig.EndpointURL) + }); + + // Get the chat client and convert to IChatClient + var chatClient = openAIClient.GetChatClient(lmStudioConfig.ModelName); + IChatClient baseChatClient = chatClient.AsIChatClient(); + + // Collect tools based on configuration + var tools = new List(); + + if (toolConfig.EnableCMDPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new CMDPlugin())); + } + + if (toolConfig.EnablePowerShellPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new PowerShellPlugin())); + } + + if (toolConfig.EnableScreenCapturePlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new ScreenCapturePlugin())); + } + + if (toolConfig.EnableKeyboardPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new KeyboardPlugin())); + } + + if (toolConfig.EnableMousePlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new MousePlugin())); + } + + if (toolConfig.EnableWindowSelectionPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new WindowSelectionPlugin())); + } + + if (toolConfig.EnablePlaywrightPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(PlaywrightPlugin.Instance)); + } + + if (toolConfig.EnableRemoteControl) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new RemoteControlPlugin())); + } + + // Configure chat options with tools + var chatOptions = new ChatOptions + { + Temperature = 1.0f, // Fixed at 1.0 for LM Studio models + MaxOutputTokens = lmStudioConfig.MaxTokens, + Tools = tools + }; + + // Build chat client with function invocation if enabled + if (toolConfig.AutoInvokeKernelFunctions) + { + // Use function invocation with default behavior + // The middleware will automatically loop and call tools + actionerChat = new ChatClientBuilder(baseChatClient) + .UseFunctionInvocation() + .Build(); + + PluginLogger.LogInfo("LMStudioActioner", "ExecuteAction", + "Function invocation enabled - will auto-invoke tools"); + } + else + { + actionerChat = baseChatClient; + } + + // Update loading message to show we're now processing the response + PluginLogger.StopLoadingIndicator(); + PluginLogger.StartLoadingIndicator("Local AI response"); + + // Process the response with streaming + var responseBuilder = new StringBuilder(); + await foreach (var update in actionerChat.GetStreamingResponseAsync(actionerHistory, chatOptions)) + { + if (update.Text != null) + { + responseBuilder.Append(update.Text); + } + } + + // Task completed successfully + PluginLogger.NotifyTaskComplete("LM Studio Action Execution", true); + + var response = responseBuilder.ToString(); + + // Log the response for debugging + PluginLogger.LogInfo("LMStudioActioner", "ExecuteAction", + $"AI Response: {(string.IsNullOrEmpty(response) ? "(empty)" : $"{response.Length} chars")}"); + + // Add assistant response to history + if (!string.IsNullOrEmpty(response)) + { + actionerHistory.Add(new ChatMessage(ChatRole.Assistant, response)); + } + else + { + // If response is empty, check if there were tool calls and provide a default response + var defaultResponse = "I executed the requested action successfully."; + PluginLogger.LogInfo("LMStudioActioner", "ExecuteAction", + "Empty response from model, using default response"); + actionerHistory.Add(new ChatMessage(ChatRole.Assistant, defaultResponse)); + return defaultResponse; + } + + return response; + } + catch (Exception ex) + { + // Task failed + PluginLogger.NotifyTaskComplete("LM Studio Action Execution", false); + + // Provide helpful error messages + if (ex.Message.Contains("Connection refused") || ex.Message.Contains("No connection could be made")) + { + return $"Error: Cannot connect to LM Studio at {lmStudioConfig.EndpointURL}. " + + "Please make sure LM Studio is running and the server is started (click 'Start Server' in LM Studio)."; + } + + return $"Error: {ex.Message}\n\nMake sure LM Studio is running with a model loaded and the server is started."; + } + } + + public void SetChatHistory(System.Collections.Generic.List chatHistory) + { + actionerHistory.Clear(); + foreach (var message in chatHistory) + { + if (message.Author == "You") + { + actionerHistory.Add(new ChatMessage(ChatRole.User, message.Content)); + } + else if (message.Author == "AI") + { + actionerHistory.Add(new ChatMessage(ChatRole.Assistant, message.Content)); + } + } + } + + public void ClearHistory() + { + actionerHistory.Clear(); + } + } +} diff --git a/FlowVision/lib/Classes/ai/MultiAgentActioner.cs b/FlowVision/lib/Classes/ai/MultiAgentActioner.cs new file mode 100644 index 0000000..b258b12 --- /dev/null +++ b/FlowVision/lib/Classes/ai/MultiAgentActioner.cs @@ -0,0 +1,800 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Windows.Forms; +using FlowVision.lib.Plugins; +using Microsoft.Extensions.AI; +using FlowVision.lib.Classes.ai; +using Azure.AI.OpenAI; // Needed for some types if referenced, but Factory returns IChatClient +using Azure; // Needed for AzureKeyCredential if strictly typed, but Factory handles it. +using FlowVision; // Required for Form1 + +namespace FlowVision.lib.Classes +{ + /// + /// Multi-agent actioner that coordinates between a coordinator, planner agent and an execution agent + /// + public class MultiAgentActioner + { + private IChatClient coordinatorChat; + private IChatClient plannerChat; + private IChatClient actionerChat; + private List coordinatorHistory; + private List plannerHistory; + private List actionerHistory; + private AgentCoordinator agentCoordinator; + + // Configuration constants + private const string TOOL_CONFIG = "toolsconfig"; + private const string ACTIONER_CONFIG = "actioner"; + + // ToolConfig instance to store and access configuration + private ToolConfig toolConfig; + + public MultiAgentActioner(Form1.PluginOutputHandler outputHandler) + { + coordinatorHistory = new List(); + plannerHistory = new List(); + actionerHistory = new List(); + agentCoordinator = new AgentCoordinator(); + + // Load tool configuration when the MultiAgentActioner is initialized + toolConfig = ToolConfig.LoadConfig(TOOL_CONFIG); + + // Create a RichTextBox that isn't displayed but used for logging + var hiddenTextBox = new RichTextBox { Visible = false }; + + // Initialize the plugin logger + if (Application.OpenForms.Count > 0 && Application.OpenForms[0] is Form1 mainForm) + { + Action addMessageAction = mainForm.AddMessage; + PluginLogger.Initialize(hiddenTextBox, addMessageAction); + } + else + { + PluginLogger.Initialize(hiddenTextBox); + } + + // Override the UpdateUI method to use our output handler + if (outputHandler != null) + { + hiddenTextBox.TextChanged += (sender, e) => + { + string newText = hiddenTextBox.Lines.LastOrDefault(); + if (!string.IsNullOrEmpty(newText)) + { + outputHandler(newText); + } + }; + } + } + + public async Task ExecuteAction(string actionPrompt) + { + // Reload tool configuration to ensure we have the most recent settings + toolConfig = ToolConfig.LoadConfig(TOOL_CONFIG); + + // Build dynamic tool description segment if enabled + string toolDescriptions = toolConfig.DynamicToolPrompts + ? "\n\n" + ToolDescriptionGenerator.GetToolDescriptions(toolConfig) + : string.Empty; + + PluginLogger.NotifyTaskStart("Multi-Agent Action", "Planning and executing your request"); + PluginLogger.StartLoadingIndicator("coordination"); + + try + { + // Configure coordinator first + coordinatorHistory.Add(new ChatMessage(ChatRole.System, toolConfig.CoordinatorSystemPrompt + toolDescriptions)); + coordinatorHistory.Add(new ChatMessage(ChatRole.User, actionPrompt)); + + // Configure planner for later use + plannerHistory.Clear(); + plannerHistory.Add(new ChatMessage(ChatRole.System, toolConfig.PlannerSystemPrompt + toolDescriptions)); + + + // Configure actioner for later use + actionerHistory.Clear(); + actionerHistory.Add(new ChatMessage(ChatRole.System, toolConfig.ActionerSystemPrompt + toolDescriptions)); + + + // Clear agent coordinator message history + agentCoordinator.Clear(); + agentCoordinator.AddMessage(AgentRole.User, AgentRole.Coordinator, + "USER_REQUEST", actionPrompt); + + // Load model configurations - use either custom configs or default + APIConfig coordinatorConfig = toolConfig.UseCustomCoordinatorConfig + ? APIConfig.LoadConfig(toolConfig.CoordinatorConfigName) + : APIConfig.LoadConfig(ACTIONER_CONFIG); + + APIConfig plannerConfig = toolConfig.UseCustomPlannerConfig + ? APIConfig.LoadConfig(toolConfig.PlannerConfigName) + : APIConfig.LoadConfig(ACTIONER_CONFIG); + + APIConfig actionerConfig = toolConfig.UseCustomActionerConfig + ? APIConfig.LoadConfig(toolConfig.ActionerConfigName) + : APIConfig.LoadConfig(ACTIONER_CONFIG); + + // Verify coordinator config + if (string.IsNullOrWhiteSpace(coordinatorConfig.DeploymentName) || + string.IsNullOrWhiteSpace(coordinatorConfig.EndpointURL) || + string.IsNullOrWhiteSpace(coordinatorConfig.APIKey)) + { + PluginLogger.NotifyTaskComplete("Multi-Agent Action", false); + return "Error: Coordinator model not configured"; + } + + // Verify planner config + if (string.IsNullOrWhiteSpace(plannerConfig.DeploymentName) || + string.IsNullOrWhiteSpace(plannerConfig.EndpointURL) || + string.IsNullOrWhiteSpace(plannerConfig.APIKey)) + { + PluginLogger.NotifyTaskComplete("Multi-Agent Action", false); + return "Error: Planner model not configured"; + } + + // Verify actioner config + if (string.IsNullOrWhiteSpace(actionerConfig.DeploymentName) || + string.IsNullOrWhiteSpace(actionerConfig.EndpointURL) || + string.IsNullOrWhiteSpace(actionerConfig.APIKey)) + { + PluginLogger.NotifyTaskComplete("Multi-Agent Action", false); + return "Error: Actioner model not configured"; + } + + // Setup clients using the Factory (supports Azure, Gemini, etc.) + coordinatorChat = AIClientFactory.CreateClient(coordinatorConfig); + plannerChat = AIClientFactory.CreateClient(plannerConfig); + + // Setup actioner client base + IChatClient actionerChatBase = AIClientFactory.CreateClient(actionerConfig); + + // Collect tools based on configuration + var tools = new List(); + + if (toolConfig.EnableCMDPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new CMDPlugin())); + } + + if (toolConfig.EnablePowerShellPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new PowerShellPlugin())); + } + + if (toolConfig.EnableScreenCapturePlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new ScreenCapturePlugin())); + } + + if (toolConfig.EnableKeyboardPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new KeyboardPlugin())); + } + + if (toolConfig.EnableMousePlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new MousePlugin())); + } + + if (toolConfig.EnableWindowSelectionPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new WindowSelectionPlugin())); + } + + if (toolConfig.EnablePlaywrightPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(PlaywrightPlugin.Instance)); + } + + if (toolConfig.EnableRemoteControl) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new RemoteControlPlugin())); + } + + if (toolConfig.EnableClipboardPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new ClipboardPlugin())); + } + + if (toolConfig.EnableFileSystemPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new FileSystemPlugin())); + } + + // Setup actioner with function invocation using builder pattern + actionerChat = new ChatClientBuilder(actionerChatBase).UseFunctionInvocation().Build(); + + // Get initial coordination from coordinator agent + PluginLogger.StopLoadingIndicator(); + PluginLogger.LogPluginUsage("🗣️ Coordinating request..."); + PluginLogger.StartLoadingIndicator("coordination"); + + var coordinatorOptions = new ChatOptions + { + Temperature = (float)toolConfig.Temperature + }; + + var plannerOptions = new ChatOptions + { + Temperature = (float)toolConfig.Temperature + }; + + var actionerOptions = new ChatOptions + { + Temperature = (float)toolConfig.Temperature, + Tools = tools + }; + + // Get the initial coordination + string coordinatorResponse = await GetAgentResponseAsync(coordinatorChat, coordinatorHistory, coordinatorOptions); + PluginLogger.LogPluginUsage("🎯 Coordinator Assessment:\n" + coordinatorResponse); + + agentCoordinator.AddMessage(AgentRole.Coordinator, AgentRole.Planner, + "COORDINATION_RESPONSE", coordinatorResponse); + + // Send the task to the planner + plannerHistory.Add(new ChatMessage(ChatRole.User, coordinatorResponse)); + + PluginLogger.StopLoadingIndicator(); + PluginLogger.LogPluginUsage("🧠 Planning approach..."); + PluginLogger.StartLoadingIndicator("planning"); + + // Get the initial plan + string plan = await GetAgentResponseAsync(plannerChat, plannerHistory, plannerOptions); + PluginLogger.LogPluginUsage("📝 Initial Plan:\n" + plan); + + + agentCoordinator.AddMessage(AgentRole.Planner, AgentRole.Actioner, + + "PLAN_RESPONSE", plan); + + // Now execute the plan step by step + bool isComplete = false; + int maxIterations = 25; + int currentIteration = 0; + string finalResult = ""; + List executionResults = new List(); + + // Novel Feature: Focus Tracking + var windowTracker = new WindowSelectionPlugin(); + + // Novel Feature: Reflection Memory + // Stores lessons learned from failures to prevent repeating mistakes + List lessonsLearned = new List(); + int consecutiveFailures = 0; + const int MAX_CONSECUTIVE_FAILURES = 3; + + while (!isComplete && currentIteration < maxIterations) + { + currentIteration++; + PluginLogger.LogPluginUsage($"⚙️ Step {currentIteration}/{maxIterations}"); + + // Capture pre-action state + string preActionWindow = windowTracker.GetForegroundWindowInfo(); + + // Build context with lessons learned (if any) + string lessonsContext = lessonsLearned.Count > 0 + ? $"\n\n⚠️ LESSONS FROM PREVIOUS ATTEMPTS:\n" + string.Join("\n", lessonsLearned.Select((l, i) => $"{i + 1}. {l}")) + : ""; + + // Ask actioner to perform the current step with enhanced instructions + actionerHistory.Add(new ChatMessage(ChatRole.User, + $"Execute this step:\n\n{plan}\n\n" + + $"Current progress: {currentIteration}/{maxIterations} steps" + + lessonsContext + "\n\n" + + "CRITICAL RULES:\n" + + "1. Use GetPageElements() to find selectors for elements\n" + + "2. Use ClickElement with waitForNavigation=\"true\" for submit buttons\n" + + "3. If an action fails, STOP and explain exactly what went wrong\n" + + "4. Report: [ACTION TAKEN] + [RESULT OBSERVED] + [SUCCESS/FAILURE]")); + + agentCoordinator.AddMessage(AgentRole.Planner, AgentRole.Actioner, + "EXECUTION_REQUEST", plan); + + PluginLogger.StopLoadingIndicator(); + PluginLogger.LogPluginUsage("🔧 Executing step..."); + PluginLogger.StartLoadingIndicator("executing"); + + // Get actioner response with tools + string executionResult = await GetAgentResponseAsync(actionerChat, actionerHistory, actionerOptions); + + // ═══════════════════════════════════════════════════════════════ + // ANTI-HALLUCINATION CHECK + // Detect if actioner claimed to do something without tool calls + // ═══════════════════════════════════════════════════════════════ + bool possibleHallucination = DetectHallucination(executionResult); + if (possibleHallucination) + { + PluginLogger.LogPluginUsage("⚠️ Possible hallucination detected - no tool calls found"); + executionResult = "[SYSTEM WARNING: The actioner responded without calling any tools. " + + "This response may be hallucinated. The actioner MUST call tools to perform actions.]\n\n" + + "Original response: " + executionResult; + } + + // Capture post-action state + string postActionWindow = windowTracker.GetForegroundWindowInfo(); + + // Handle empty execution results + if (string.IsNullOrWhiteSpace(executionResult)) + { + executionResult = "[No tool was called. The actioner must call a tool to perform an action.]"; + } + + // Focus Change Alert + if (preActionWindow != postActionWindow) + { + string alert = $"\n\n[SYSTEM ALERT]: Active window focus changed!\n" + + $"Previous: {preActionWindow}\n" + + $"Current: {postActionWindow}\n" + + $"Use the new Handle ({postActionWindow.Split(',')[0]}) for subsequent interactions."; + + executionResult += alert; + PluginLogger.LogInfo("MultiAgentActioner", "ExecuteAction", "Detected window focus change."); + } + + // ═══════════════════════════════════════════════════════════════ + // SELF-REFLECTION LOOP + // Detect failures and learn from them to prevent repetition + // ═══════════════════════════════════════════════════════════════ + bool stepFailed = DetectStepFailure(executionResult) || possibleHallucination; + + if (stepFailed) + { + consecutiveFailures++; + PluginLogger.LogPluginUsage($"⚠️ Step appears to have failed ({consecutiveFailures}/{MAX_CONSECUTIVE_FAILURES})"); + + // Ask the actioner to reflect on what went wrong + actionerHistory.Add(new ChatMessage(ChatRole.User, + "🔍 REFLECTION REQUIRED: The previous step appears to have failed.\n\n" + + "Analyze what went wrong and provide:\n" + + "1. WHAT FAILED: What specific action didn't work?\n" + + "2. WHY IT FAILED: What was the root cause?\n" + + "3. HOW TO FIX: What should be done differently?\n\n" + + "Be specific and actionable.")); + + PluginLogger.LogPluginUsage("🔍 Reflecting on failure..."); + string reflection = await GetAgentResponseAsync(actionerChat, actionerHistory, actionerOptions); + + // Extract the lesson and add to memory + string lesson = ExtractLesson(reflection, plan); + if (!string.IsNullOrEmpty(lesson)) + { + lessonsLearned.Add(lesson); + PluginLogger.LogPluginUsage($"📚 Lesson learned: {lesson}"); + } + + // Add reflection to execution result + executionResult += $"\n\n[REFLECTION]:\n{reflection}"; + + // If too many consecutive failures, escalate to coordinator + if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) + { + PluginLogger.LogPluginUsage("🚨 Multiple failures detected, requesting coordinator intervention..."); + + coordinatorHistory.Add(new ChatMessage(ChatRole.User, + $"⚠️ INTERVENTION NEEDED: The actioner has failed {consecutiveFailures} times in a row.\n\n" + + $"Original task: {actionPrompt}\n\n" + + $"Current step that keeps failing: {plan}\n\n" + + $"Lessons learned:\n{string.Join("\n", lessonsLearned)}\n\n" + + "Please provide a DIFFERENT approach to complete this task.")); + + string intervention = await GetAgentResponseAsync(coordinatorChat, coordinatorHistory, coordinatorOptions); + + // Reset the planner with the new approach + plannerHistory.Clear(); + plannerHistory.Add(new ChatMessage(ChatRole.System, toolConfig.PlannerSystemPrompt + toolDescriptions)); + plannerHistory.Add(new ChatMessage(ChatRole.User, + $"New approach from coordinator:\n{intervention}\n\n" + + $"Previous lessons learned:\n{string.Join("\n", lessonsLearned)}")); + + consecutiveFailures = 0; + PluginLogger.LogPluginUsage("🔄 Trying new approach from coordinator..."); + } + } + else + { + consecutiveFailures = 0; // Reset on success + } + // ═══════════════════════════════════════════════════════════════ + + // Store the execution result + executionResults.Add(executionResult); + + PluginLogger.LogPluginUsage("📊 Step result:\n" + executionResult); + + agentCoordinator.AddMessage(AgentRole.Actioner, AgentRole.Planner, + "EXECUTION_RESPONSE", executionResult); + + // Manage context window to prevent token overflow + ManageContextWindow(actionerHistory, 12); + ManageContextWindow(plannerHistory, 12); + + // Add the execution result to the planner's history with enhanced prompting + string successIndicator = stepFailed ? "⚠️ STEP FAILED" : "✅ STEP SUCCEEDED"; + + plannerHistory.Add(new ChatMessage(ChatRole.User, + $"{successIndicator}\n\n" + + $"Step {currentIteration} Result:\n{executionResult}\n\n" + + $"Progress: {currentIteration}/{maxIterations} steps\n\n" + + (lessonsLearned.Count > 0 ? $"Lessons learned:\n{string.Join("\n", lessonsLearned)}\n\n" : "") + + "Evaluate and respond with EXACTLY ONE of:\n" + + "A) 'TASK COMPLETED: [summary of what was accomplished]'\n" + + "B) 'NEXT STEP: [specific action to take with exact tool call]'\n" + + "C) 'TASK FAILED: [explanation of why it cannot be completed]'")); + + + PluginLogger.StopLoadingIndicator(); + PluginLogger.LogPluginUsage("🔄 Evaluating progress..."); + PluginLogger.StartLoadingIndicator("planning"); + + // Get planner's evaluation of the result + plan = await GetAgentResponseAsync(plannerChat, plannerHistory, plannerOptions); + + agentCoordinator.AddMessage(AgentRole.Planner, AgentRole.Coordinator, + "STATUS_UPDATE", plan); + + // Check if the task is complete or failed + bool taskCompleted = plan.IndexOf("TASK COMPLETED", StringComparison.OrdinalIgnoreCase) >= 0; + bool taskFailed = plan.IndexOf("TASK FAILED", StringComparison.OrdinalIgnoreCase) >= 0; + + if (taskCompleted || taskFailed) + { + isComplete = true; + + if (taskCompleted) + { + PluginLogger.LogPluginUsage("✅ Task marked as complete by planner"); + } + else + { + PluginLogger.LogPluginUsage("❌ Task marked as failed by planner"); + } + + // Send all execution results to the coordinator for final formatting + string executionSummary = string.Join("\n\n", executionResults); + string lessonsText = lessonsLearned.Count > 0 + ? $"\n\nLessons learned during execution:\n{string.Join("\n", lessonsLearned)}" + : ""; + + coordinatorHistory.Add(new ChatMessage(ChatRole.User, + $"The task has {(taskCompleted ? "been completed" : "failed")} after {currentIteration} steps.\n\n" + + $"Complete execution log:\n{executionSummary}\n\n" + + $"Planner's final message:\n{plan}" + lessonsText + "\n\n" + + "Please provide a clear, user-friendly summary of what was accomplished (or what went wrong). " + + "Include specific results, any important details, and the current state. " + + "Be concise but informative. Do not use technical tags or internal markers." + )); + + PluginLogger.StopLoadingIndicator(); + PluginLogger.LogPluginUsage("📝 Generating user response..."); + PluginLogger.StartLoadingIndicator("coordination"); + + // Get coordinator's final response with detailed results + finalResult = await GetAgentResponseAsync(coordinatorChat, coordinatorHistory, coordinatorOptions); + + // Store this as a completed response + agentCoordinator.AddMessage(AgentRole.Coordinator, AgentRole.User, + "USER_RESPONSE", finalResult); + } + else + { + // Extract just the next step from the plan (remove "NEXT STEP:" prefix if present) + if (plan.IndexOf("NEXT STEP:", StringComparison.OrdinalIgnoreCase) >= 0) + { + int idx = plan.IndexOf("NEXT STEP:", StringComparison.OrdinalIgnoreCase); + plan = plan.Substring(idx + 10).Trim(); + } + PluginLogger.LogPluginUsage($"⏭️ Next step:\n{plan}"); + } + } + + PluginLogger.StopLoadingIndicator(); + + if (isComplete) + { + PluginLogger.NotifyTaskComplete("Multi-Agent Action", true); + return finalResult; + } + else + { + // Compile all execution results into a comprehensive response + string allResults = string.Join("\n\n", executionResults); + + // Get coordinator to explain the incomplete task status with the results + coordinatorHistory.Add(new ChatMessage(ChatRole.User, + $"The task could not be completed within {maxIterations} iterations, but here are the results so far:\n\n{allResults}\n\n" + + "Please provide a detailed response for the user that contains all the information gathered, even though the task wasn't fully completed." + )); + + PluginLogger.LogPluginUsage("⚠️ Maximum iterations reached, generating explanation with results..."); + PluginLogger.StartLoadingIndicator("coordination"); + + // Get coordinator's explanation with detailed results + string resultWithExplanation = await GetAgentResponseAsync(coordinatorChat, coordinatorHistory, coordinatorOptions); + + agentCoordinator.AddMessage(AgentRole.Coordinator, AgentRole.User, + "STATUS_UPDATE", resultWithExplanation); + + PluginLogger.StopLoadingIndicator(); + PluginLogger.NotifyTaskComplete("Multi-Agent Action", false); + return resultWithExplanation; + } + } + catch (Exception ex) + { + PluginLogger.StopLoadingIndicator(); + PluginLogger.NotifyTaskComplete("Multi-Agent Action", false); + return $"Error: {ex.Message}"; + } + } + + private async Task GetAgentResponseAsync( + IChatClient chatService, + List history, + ChatOptions options) + { + var responseBuilder = new StringBuilder(); + + try + { + await foreach (var update in chatService.GetStreamingResponseAsync(history, options)) + { + if (update.Text != null) + { + responseBuilder.Append(update.Text); + } + } + } + catch (Exception ex) when (ex.Message.Contains("Unknown ChatFinishReason") || ex.Message.Contains("function_call_filter")) + { + // Swallow known SDK mapping errors for specific provider finish reasons + // This allows us to keep the text generated so far + PluginLogger.LogInfo("MultiAgentActioner", "GetAgentResponseAsync", $"Ignored known SDK finish reason error: {ex.Message}"); + } + + string response = responseBuilder.ToString(); + history.Add(new ChatMessage(ChatRole.Assistant, response)); + + return response; + } + + private void ManageContextWindow(List history, int maxMessages) + { + // Always keep the system prompt (assumed to be at index 0) + if (history.Count <= maxMessages + 1) return; + + // Calculate how many messages to remove + // We want to keep: SystemPrompt (1) + Last N messages + int messagesToRemove = history.Count - (maxMessages + 1); + + if (messagesToRemove > 0) + { + // Remove messages starting from index 1 (preserve System Prompt) + history.RemoveRange(1, messagesToRemove); + } + } + + /// + /// Extracts the first actionable step from the planner's plan. + /// Looks for lines that mention a tool/plugin or a direct action. + /// + private string ExtractActionableStep(string plan) + { + if (string.IsNullOrWhiteSpace(plan)) + return null; + + // Look for lines that mention 'use', 'plugin', or 'tool' + var lines = plan.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); + foreach (var line in lines) + { + var lower = line.ToLowerInvariant(); + if (lower.Contains("use") && (lower.Contains("plugin") || lower.Contains("tool"))) + { + return line.Trim(); + } + // Also allow direct imperative instructions + if (lower.StartsWith("set ") || lower.StartsWith("capture ") || lower.StartsWith("extract ")) + { + return line.Trim(); + } + } + // Fallback: if plan is a single actionable sentence + if (lines.Length == 1 && lines[0].Length < 200) + return lines[0].Trim(); + + return null; + } + + /// + /// Detects if the actioner hallucinated (claimed to do something without calling tools) + /// + private bool DetectHallucination(string executionResult) + { + if (string.IsNullOrWhiteSpace(executionResult)) + return true; // Empty response = no tool called + + string lower = executionResult.ToLowerInvariant(); + + // CRITICAL: Check for fake tool names that don't exist + string[] fakeTools = new[] + { + "findtextonscreen", "searchtext", "locatetext", "findtext", + "gettext", "readtext", "scanscreen", "analyzescreen", + "task.delay", "wait(", "sleep(", "pause(", + "downloadfile", "savefile", "openfile" + }; + + foreach (var fakeTool in fakeTools) + { + if (lower.Contains(fakeTool)) + { + PluginLogger.LogInfo("MultiAgentActioner", "DetectHallucination", + $"Detected fake tool: {fakeTool}"); + return true; + } + } + + // Check for code block tool calls (model writing code instead of calling functions) + if (executionResult.Contains("```tool_code") || + executionResult.Contains("```python") || + executionResult.Contains("```csharp")) + { + PluginLogger.LogInfo("MultiAgentActioner", "DetectHallucination", + "Detected code block - model writing code instead of calling tools"); + return true; + } + + // Check for fake API responses + if (lower.Contains("the api returned") && + (lower.Contains("'found': true") || lower.Contains("\"found\": true"))) + { + PluginLogger.LogInfo("MultiAgentActioner", "DetectHallucination", + "Detected fake API response"); + return true; + } + + // Signs that a tool was actually called (real tool output markers) + string[] realToolIndicators = new[] + { + "window handle", "windowhandle", "handle:", + "screenshot saved", "captured screen", "ui element #", + "bbox:", "bounding box", "[left", + "process started", "command executed", + "browser launched", "navigated to", + "clicked at", "typed text", "sent key" + }; + + bool hasRealToolOutput = false; + foreach (var indicator in realToolIndicators) + { + if (lower.Contains(indicator)) + { + hasRealToolOutput = true; + break; + } + } + + // Signs of hallucination (claims without evidence) + string[] hallucinationPatterns = new[] + { + "i have successfully", "i've successfully", "i successfully", + "i logged in", "i signed in", "i clicked", "i typed", + "i opened", "i navigated", "i scrolled", "i liked", + "the task is complete", "task completed", "done", + "i performed", "i executed", "i did", + "[action taken]", "[result observed]", "[success]" + }; + + foreach (var pattern in hallucinationPatterns) + { + if (lower.Contains(pattern) && !hasRealToolOutput) + { + return true; + } + } + + return false; + } + + /// + /// Detects if a step failed based on the execution result + /// + private bool DetectStepFailure(string executionResult) + { + if (string.IsNullOrWhiteSpace(executionResult)) + return false; + + string lower = executionResult.ToLowerInvariant(); + + // Explicit failure indicators + string[] failurePatterns = new[] + { + "error:", "exception:", "failed to", "could not", "unable to", + "not found", "does not exist", "permission denied", "access denied", + "timeout", "timed out", "no such", "invalid", "cannot find", + "null reference", "object reference", "index out of range", + "window handle is invalid", "element not found", "selector not found", + "no matching element", "click failed", "type failed" + }; + + foreach (var pattern in failurePatterns) + { + if (lower.Contains(pattern)) + return true; + } + + // Check for empty or non-actionable results + if (executionResult.Trim().Length < 10) + return false; // Too short to determine, assume success + + return false; + } + + /// + /// Extracts a concise lesson from a reflection response + /// + private string ExtractLesson(string reflection, string failedStep) + { + if (string.IsNullOrWhiteSpace(reflection)) + return $"Step '{TruncateString(failedStep, 50)}' failed - reason unknown"; + + // Look for key phrases that indicate lessons + var lines = reflection.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); + + foreach (var line in lines) + { + var lower = line.ToLowerInvariant(); + // Look for fix recommendations + if (lower.Contains("should") || lower.Contains("instead") || + lower.Contains("fix:") || lower.Contains("solution:") || + lower.Contains("correct way") || lower.Contains("need to")) + { + return TruncateString(line.Trim(), 150); + } + } + + // Fallback: summarize the failure + return $"Avoid: {TruncateString(failedStep, 50)} - {TruncateString(reflection, 100)}"; + } + + /// + /// Truncates a string to a maximum length with ellipsis + /// + private string TruncateString(string input, int maxLength) + { + if (string.IsNullOrEmpty(input) || input.Length <= maxLength) + return input; + return input.Substring(0, maxLength - 3) + "..."; + } + + public void SetChatHistory(System.Collections.Generic.List chatHistory) + { + // Set up coordinator history with system prompt + coordinatorHistory.Clear(); + string toolDescriptions = toolConfig.DynamicToolPrompts + ? "\n\n" + ToolDescriptionGenerator.GetToolDescriptions(toolConfig) + : string.Empty; + coordinatorHistory.Add(new ChatMessage(ChatRole.System, toolConfig.CoordinatorSystemPrompt + toolDescriptions)); + + // Set up planner history with system prompt + plannerHistory.Clear(); + plannerHistory.Add(new ChatMessage(ChatRole.System, toolConfig.PlannerSystemPrompt + toolDescriptions)); + + foreach (var message in chatHistory) + { + if (message.Author == "You") + { + coordinatorHistory.Add(new ChatMessage(ChatRole.User, message.Content)); + } + else if (message.Author == "AI") + { + coordinatorHistory.Add(new ChatMessage(ChatRole.Assistant, message.Content)); + } + } + } + } +} diff --git a/FlowVision/lib/Classes/ai/SimpleAgentActioner.cs b/FlowVision/lib/Classes/ai/SimpleAgentActioner.cs new file mode 100644 index 0000000..a1f7cb9 --- /dev/null +++ b/FlowVision/lib/Classes/ai/SimpleAgentActioner.cs @@ -0,0 +1,393 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Windows.Forms; +using FlowVision.lib.Plugins; +using Microsoft.Extensions.AI; +using FlowVision.lib.Classes.ai; + +namespace FlowVision.lib.Classes +{ + /// + /// Simple single-agent actioner with tool verification. + /// No complex multi-agent coordination - just one agent that uses tools directly. + /// Tracks actual tool invocations to prevent hallucination. + /// + public class SimpleAgentActioner + { + private IChatClient agentChat; + private List chatHistory; + private ToolConfig toolConfig; + + // Track tool invocations + private static int _toolCallCount = 0; + private static readonly object _toolLock = new object(); + + private const string TOOL_CONFIG = "toolsconfig"; + private const string ACTIONER_CONFIG = "actioner"; + private const int MAX_TURNS = 15; + + public SimpleAgentActioner(Form1.PluginOutputHandler outputHandler) + { + chatHistory = new List(); + + var hiddenTextBox = new RichTextBox { Visible = false }; + + if (Application.OpenForms.Count > 0 && Application.OpenForms[0] is Form1 mainForm) + { + PluginLogger.Initialize(hiddenTextBox, mainForm.AddMessage); + } + else + { + PluginLogger.Initialize(hiddenTextBox); + } + + if (outputHandler != null) + { + hiddenTextBox.TextChanged += (sender, e) => + { + string newText = hiddenTextBox.Lines.LastOrDefault(); + if (!string.IsNullOrEmpty(newText)) + { + outputHandler(newText); + } + }; + } + } + + /// + /// Reset tool call counter before each agent turn + /// + public static void ResetToolCallCounter() + { + lock (_toolLock) + { + _toolCallCount = 0; + } + } + + /// + /// Increment tool call counter (called by plugins) + /// + public static void IncrementToolCallCounter() + { + lock (_toolLock) + { + _toolCallCount++; + } + } + + /// + /// Get current tool call count + /// + public static int GetToolCallCount() + { + lock (_toolLock) + { + return _toolCallCount; + } + } + + public async Task ExecuteAction(string userRequest) + { + toolConfig = ToolConfig.LoadConfig(TOOL_CONFIG); + + PluginLogger.NotifyTaskStart("Agent Task", "Processing your request..."); + + try + { + // Initialize the agent + await InitializeAgent(); + + // Build system prompt + string systemPrompt = BuildSystemPrompt(); + chatHistory.Clear(); + chatHistory.Add(new ChatMessage(ChatRole.System, systemPrompt)); + + // Add user request + chatHistory.Add(new ChatMessage(ChatRole.User, userRequest)); + + // Get available tools + var tools = GetTools(); + var chatOptions = new ChatOptions { Tools = tools }; + + // Agentic loop + StringBuilder conversationLog = new StringBuilder(); + int turn = 0; + + while (turn < MAX_TURNS) + { + turn++; + PluginLogger.LogPluginUsage($"🔄 Turn {turn}/{MAX_TURNS}"); + + // Reset tool counter before this turn + ResetToolCallCounter(); + + // Get agent response (with automatic function invocation) + var responseBuilder = new StringBuilder(); + + try + { + await foreach (var update in agentChat.GetStreamingResponseAsync(chatHistory, chatOptions)) + { + if (update.Text != null) + { + responseBuilder.Append(update.Text); + } + } + } + catch (Exception ex) when (ex.Message.Contains("Unknown ChatFinishReason") || + ex.Message.Contains("function_call")) + { + // Ignore known SDK finish reason errors + PluginLogger.LogInfo("SimpleAgentActioner", "ExecuteAction", + $"Ignored SDK error: {ex.Message}"); + } + + string response = responseBuilder.ToString().Trim(); + + // Log the response + if (!string.IsNullOrEmpty(response)) + { + chatHistory.Add(new ChatMessage(ChatRole.Assistant, response)); + conversationLog.AppendLine($"[Turn {turn}]: {response}"); + PluginLogger.LogPluginUsage($"💬 Agent: {TruncateForLog(response)}"); + } + + // Check how many tools were actually called + int toolsCalled = GetToolCallCount(); + PluginLogger.LogInfo("SimpleAgentActioner", "ExecuteAction", + $"Turn {turn}: {toolsCalled} tools called"); + + // Check for task completion + if (IsTaskComplete(response)) + { + PluginLogger.NotifyTaskComplete("Agent Task", true); + return ExtractFinalResponse(response, conversationLog.ToString()); + } + + // Check for task failure + if (IsTaskFailed(response)) + { + PluginLogger.NotifyTaskComplete("Agent Task", false); + return ExtractFailureResponse(response, conversationLog.ToString()); + } + + // If no tools were called and agent is just talking, prompt to take action + if (toolsCalled == 0 && !string.IsNullOrEmpty(response)) + { + chatHistory.Add(new ChatMessage(ChatRole.User, + "[SYSTEM] You must call tools to perform actions. " + + "Don't just describe what you would do - actually call the tools. " + + "Start by calling GetPageElements() to see what's on the page.")); + } + + // Small delay between turns + await Task.Delay(500); + } + + // Max turns reached + PluginLogger.NotifyTaskComplete("Agent Task", false); + return $"I made progress but couldn't complete the task in {MAX_TURNS} turns.\n\n" + + $"Here's what happened:\n{conversationLog}"; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Agent Task", false); + PluginLogger.LogError("SimpleAgentActioner", "ExecuteAction", ex.Message); + return $"Error: {ex.Message}"; + } + } + + private async Task InitializeAgent() + { + // Get API config + var apiConfig = APIConfig.LoadConfig(ACTIONER_CONFIG); + + // Validate config before creating client + if (string.IsNullOrWhiteSpace(apiConfig.EndpointURL) || + string.IsNullOrWhiteSpace(apiConfig.APIKey)) + { + throw new InvalidOperationException( + "API not configured. Please go to Settings and configure your AI provider (Azure OpenAI, OpenAI, LM Studio, or Gemini)."); + } + + // Create chat client using factory + var baseChatClient = AIClientFactory.CreateClient(apiConfig); + + // Wrap with function invocation support + if (toolConfig.AutoInvokeKernelFunctions) + { + agentChat = new ChatClientBuilder(baseChatClient) + .UseFunctionInvocation() + .Build(); + } + else + { + agentChat = baseChatClient; + } + + await Task.CompletedTask; + } + + private string BuildSystemPrompt() + { + return @"You are a Windows computer control agent. You complete tasks by calling tools. + +## CRITICAL RULES + +1. **ALWAYS call tools** - Don't describe actions, perform them +2. **Use Playwright for web tasks** - LaunchBrowser, NavigateTo, ClickElement, TypeText +3. **One action at a time** - Perform action, verify result, continue + +## PLAYWRIGHT WORKFLOW (for web automation) + +1. LaunchBrowser(browserType, headless) - Start browser ('chromium', 'firefox', or 'webkit') +2. NavigateTo(url) - Go to a website +3. ClickElement(selector) - Click using CSS selector (e.g., 'button.submit', '#login', 'a[href*=login]') +4. TypeText(selector, text) - Type into input field +5. GetPageContent() - Get page HTML/text to find elements +6. CloseBrowser() - When done + +## EXAMPLE: Login to a website + +1. LaunchBrowser('chromium', false) +2. NavigateTo('https://example.com/login') +3. TypeText('#username', 'myuser') +4. TypeText('#password', 'mypass') +5. ClickElement('button[type=submit]') +6. GetPageContent() to verify login worked + +## AVAILABLE TOOLS + +**Playwright (Web):** +- LaunchBrowser(browserType, headless) - Start browser +- NavigateTo(url) - Go to URL +- ClickElement(selector) - Click element by CSS selector +- TypeText(selector, text) - Type into input +- GetPageContent() - Get page content +- CloseBrowser() - Close browser + +**System:** +- ExecuteCommand(cmd) - Run shell command +- ListWindowHandles() - Get open windows + +## CSS SELECTOR TIPS + +- By ID: '#loginButton' +- By class: '.submit-btn' +- By tag: 'button', 'input', 'a' +- By attribute: 'input[name=email]', 'a[href*=login]' +- By text (Playwright): 'text=Sign In', 'button:has-text(Submit)' + +## COMPLETION + +When done: TASK COMPLETE: [what you did] +If stuck: TASK FAILED: [why]"; + } + + private IList GetTools() + { + var tools = new List(); + + // Playwright is the primary tool for web automation + if (toolConfig.EnablePlaywrightPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(PlaywrightPlugin.Instance)); + } + + if (toolConfig.EnableWindowSelectionPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new WindowSelectionPlugin())); + } + + if (toolConfig.EnableMousePlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new MousePlugin())); + } + + if (toolConfig.EnableKeyboardPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new KeyboardPlugin())); + } + + if (toolConfig.EnableCMDPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new CMDPlugin())); + } + + if (toolConfig.EnablePowerShellPlugin) + { + tools.AddRange(PluginToolExtractor.ExtractTools(new PowerShellPlugin())); + } + + return tools; + } + + private bool IsTaskComplete(string response) + { + if (string.IsNullOrEmpty(response)) return false; + var lower = response.ToLowerInvariant(); + return lower.Contains("task complete:") || + lower.Contains("task completed:") || + lower.Contains("successfully completed"); + } + + private bool IsTaskFailed(string response) + { + if (string.IsNullOrEmpty(response)) return false; + var lower = response.ToLowerInvariant(); + return lower.Contains("task failed:") || + lower.Contains("cannot complete") || + lower.Contains("unable to complete"); + } + + private string ExtractFinalResponse(string lastResponse, string conversationLog) + { + // Try to extract just the completion message + var lines = lastResponse.Split('\n'); + foreach (var line in lines) + { + if (line.ToLowerInvariant().Contains("task complete")) + { + return line.Trim(); + } + } + return lastResponse; + } + + private string ExtractFailureResponse(string lastResponse, string conversationLog) + { + var lines = lastResponse.Split('\n'); + foreach (var line in lines) + { + if (line.ToLowerInvariant().Contains("task failed")) + { + return line.Trim(); + } + } + return $"Task could not be completed.\n\nDetails:\n{lastResponse}"; + } + + private string TruncateForLog(string text, int maxLength = 100) + { + if (string.IsNullOrEmpty(text)) return ""; + if (text.Length <= maxLength) return text.Replace("\n", " "); + return text.Substring(0, maxLength).Replace("\n", " ") + "..."; + } + + public void SetChatHistory(List history) + { + chatHistory.Clear(); + foreach (var msg in history) + { + if (msg.Author == "You") + chatHistory.Add(new ChatMessage(ChatRole.User, msg.Content)); + else if (msg.Author == "AI") + chatHistory.Add(new ChatMessage(ChatRole.Assistant, msg.Content)); + } + } + } +} diff --git a/FlowVision/lib/Classes/ai/ToolDescriptionGenerator.cs b/FlowVision/lib/Classes/ai/ToolDescriptionGenerator.cs new file mode 100644 index 0000000..a2fb876 --- /dev/null +++ b/FlowVision/lib/Classes/ai/ToolDescriptionGenerator.cs @@ -0,0 +1,152 @@ +using System; +using System.Collections.Generic; +using System.Text; +using FlowVision.lib.Plugins; + +namespace FlowVision.lib.Classes +{ + /// + /// Generates dynamic descriptions of enabled plugins and tools based on the current configuration + /// + public static class ToolDescriptionGenerator + { + /// + /// Creates a formatted description of all enabled tools in the provided configuration + /// + /// The tool configuration containing enabled/disabled status of plugins + /// A formatted string describing all enabled tools and their capabilities + public static string GetToolDescriptions(ToolConfig toolConfig) + { + var sb = new StringBuilder(); + sb.AppendLine("# Available Tools and Plugins"); + sb.AppendLine("You have access to the following tools:"); + sb.AppendLine(); + + var enabledTools = new List(); + + // Add descriptions for each enabled plugin + if (toolConfig.EnableCMDPlugin) + { + enabledTools.Add(GetCMDPluginDescription()); + } + + if (toolConfig.EnablePowerShellPlugin) + { + enabledTools.Add(GetPowerShellPluginDescription()); + } + + if (toolConfig.EnableScreenCapturePlugin) + { + enabledTools.Add(GetScreenCapturePluginDescription()); + } + + if (toolConfig.EnableKeyboardPlugin) + { + enabledTools.Add(GetKeyboardPluginDescription()); + } + + if (toolConfig.EnableMousePlugin) + { + enabledTools.Add(GetMousePluginDescription()); + } + + if (toolConfig.EnableWindowSelectionPlugin) + { + enabledTools.Add(GetWindowSelectionPluginDescription()); + } + + if (toolConfig.EnablePlaywrightPlugin) + { + enabledTools.Add(GetPlaywrightPluginDescription()); + } + + if (toolConfig.EnableRemoteControl) + { + enabledTools.Add(GetRemoteControlPluginDescription()); + } + + // Join all descriptions with double line breaks + sb.Append(string.Join("\n\n", enabledTools)); + + if (enabledTools.Count == 0) + { + sb.AppendLine("No tools are currently enabled in the configuration."); + } + else + { + sb.AppendLine(); + sb.AppendLine(); + sb.AppendLine("You can call these tools to help answer the user's questions or perform tasks. " + + "Make sure to use the exact tool names and parameters as specified above."); + } + + return sb.ToString(); + } + + private static string GetCMDPluginDescription() + { + return "## CMDPlugin\n" + + "- **ExecuteCommand(string command)**: Executes a Windows command prompt (CMD) command and returns the output. " + + "Use this for file operations, system information queries, and basic system administration on Windows."; + } + + private static string GetPowerShellPluginDescription() + { + return "## PowerShellPlugin\n" + + "- **ExecuteScript(string script)**: Executes a PowerShell script and returns the output. " + + "Use this for more advanced system administration tasks, registry operations, and accessing Windows APIs."; + } + + private static string GetScreenCapturePluginDescription() + { + return "## ScreenCapturePlugin\n" + + "- **ForegroundSelect(handleString)**: Brings specified window to foreground.\n" + + "- **ListWindowHandles()**: Returns list of available window handles, titles, and process names."; + } + + private static string GetKeyboardPluginDescription() + { + return "## KeyboardPlugin\n" + + "- **SendKeys(string keys)**: Sends keystrokes to the active application. " + + "- **SendHotkey(string modifiers, string key)**: Sends modifier key combinations (e.g., Ctrl+C). " + + "- **TypeText(string text)**: Types text into the active application."; + } + + private static string GetMousePluginDescription() + { + return "## MousePlugin\n" + + "- **MouseMove(int x, int y)**: Moves the mouse cursor to the specified screen coordinates. " + + "- **MouseClick(int x, int y)**: Performs a mouse click at the specified coordinates. " + + "- **MouseRightClick(int x, int y)**: Performs a right-click at the specified coordinates."; + } + + private static string GetWindowSelectionPluginDescription() + { + return "## WindowSelectionPlugin\n" + + "- **ListWindows()**: Lists all open windows with their titles and handles. " + + "- **SwitchToWindow(string windowTitle)**: Brings the specified window to the foreground. " + + "- **CloseWindow(string windowTitle)**: Closes the specified window. " + + "- **MaximizeWindow(string windowTitle)**: Maximizes the specified window."; + } + + private static string GetPlaywrightPluginDescription() + { + return "## PlaywrightPlugin\n" + + "- **LaunchBrowser()**: Launches a new browser instance for web automation. " + + "- **NavigateToUrl(string url, string waitUntil='load')**: " + + "Navigates to the URL with optional wait strategy. " + + "- **ExecuteScript(string script)**: Runs JavaScript in the browser context. " + + "- **CaptureScreenshot()**: Takes a screenshot of the current page. " + + "- **GetPageContent()**: Gets the HTML content of the current page. " + + "- **CloseBrowser()**: Closes the browser."; + } + + private static string GetRemoteControlPluginDescription() + { + return "## RemoteControlPlugin\n" + + "- **StartServer(int port)**: Starts a remote control server on the specified port. " + + "- **StopServer()**: Stops the remote control server. " + + "- **SendCommand(string command)**: Sends a command to connected clients."; + } + } +} diff --git a/FlowVision/lib/Plugins/CMDPlugin.cs b/FlowVision/lib/Plugins/CMDPlugin.cs index d73e2ad..895aaf0 100644 --- a/FlowVision/lib/Plugins/CMDPlugin.cs +++ b/FlowVision/lib/Plugins/CMDPlugin.cs @@ -1,25 +1,30 @@ -using System; +using System; using System.Collections.Generic; using System.ComponentModel; using System.Diagnostics; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; using System.Windows.Forms; using FlowVision.lib.Classes; -using Microsoft.SemanticKernel; namespace FlowVision.lib.Plugins { class CMDPlugin { + private static readonly string[] BrowserProcesses = { "msedge", "chrome", "firefox", "iexplore", "opera" }; + private static readonly string[] NonWaitingCommands = { "start ", "explorer " }; - [KernelFunction, Description("Executes a CMD command and returns the output.")] + [Description("Executes a CMD command and returns the output.")] public async Task ExecuteCommand([Description("Command Prompt Command")] string command) { // Log the plugin usage PluginLogger.LogPluginUsage("CMDPlugin", "ExecuteCommand", command); + // Detect if this is likely launching an external application + bool shouldWaitForExit = !ShouldSkipWaiting(command); + try { var startInfo = new ProcessStartInfo() @@ -35,13 +40,32 @@ public async Task ExecuteCommand([Description("Command Prompt Command")] using (var process = new Process { StartInfo = startInfo }) { process.Start(); - string output = await process.StandardOutput.ReadToEndAsync(); - string errors = await process.StandardError.ReadToEndAsync(); - process.WaitForExit(); + + // Use tasks to read output with timeout + var outputTask = process.StandardOutput.ReadToEndAsync(); + var errorTask = process.StandardError.ReadToEndAsync(); + + if (shouldWaitForExit) + { + // Wait for process with reasonable timeout + if (!process.WaitForExit(10000)) // 10 second timeout + { + return $"Command started but may still be running: {command}"; + } + } + else + { + // For non-waiting commands, give a short time to collect initial output + await Task.Delay(500); + return $"Command launched: {command}"; + } + + // Complete reading output + string output = await outputTask; + string errors = await errorTask; if (!string.IsNullOrWhiteSpace(errors)) { - return $"Error: {errors}"; } @@ -53,5 +77,60 @@ public async Task ExecuteCommand([Description("Command Prompt Command")] return $"Exception: {ex.Message}"; } } + + [Description("Launches an external application without waiting for it to exit.")] + public string LaunchApplication([Description("Application to launch")] string application, + [Description("Arguments for the application")] string arguments = "") + { + // Log the plugin usage + PluginLogger.LogPluginUsage("CMDPlugin", "LaunchApplication", $"{application} {arguments}"); + + try + { + var startInfo = new ProcessStartInfo() + { + FileName = application, + Arguments = arguments, + UseShellExecute = true, + CreateNoWindow = false + }; + + Process.Start(startInfo); + return $"Successfully launched: {application}"; + } + catch (Exception ex) + { + return $"Failed to launch {application}: {ex.Message}"; + } + } + + private bool ShouldSkipWaiting(string command) + { + if (string.IsNullOrWhiteSpace(command)) + return false; + + // Check if command contains any browser names (case-insensitive) + if (BrowserProcesses.Any(browser => + command.IndexOf(browser, StringComparison.OrdinalIgnoreCase) >= 0)) + { + return true; + } + + // Check for commands that typically launch external processes + if (NonWaitingCommands.Any(cmd => + command.StartsWith(cmd, StringComparison.OrdinalIgnoreCase))) + { + return true; + } + + // Check for URLs or web addresses + if (command.IndexOf("http://", StringComparison.OrdinalIgnoreCase) >= 0 || + command.IndexOf("https://", StringComparison.OrdinalIgnoreCase) >= 0) + { + return true; + } + + return false; + } } } diff --git a/FlowVision/lib/Plugins/ClipboardPlugin.cs b/FlowVision/lib/Plugins/ClipboardPlugin.cs new file mode 100644 index 0000000..e1ce087 --- /dev/null +++ b/FlowVision/lib/Plugins/ClipboardPlugin.cs @@ -0,0 +1,59 @@ +using System; +using System.ComponentModel; +using System.Windows.Forms; +using FlowVision.lib.Classes; + +namespace FlowVision.lib.Plugins +{ + internal class ClipboardPlugin + { + [Description("Sets the system clipboard text content")] + public void SetClipboardText(string text) + { + PluginLogger.LogPluginUsage("ClipboardPlugin", "SetClipboardText", text); + + if (Application.OpenForms.Count > 0) + { + Application.OpenForms[0].Invoke(new Action(() => { + try + { + Clipboard.SetText(text); + } + catch (Exception ex) + { + PluginLogger.LogError("ClipboardPlugin", "SetClipboardText", ex.Message); + } + })); + } + } + + [Description("Gets the current text content from the system clipboard")] + public string GetClipboardText() + { + PluginLogger.LogPluginUsage("ClipboardPlugin", "GetClipboardText"); + string clipboardText = ""; + + if (Application.OpenForms.Count > 0) + { + clipboardText = (string)Application.OpenForms[0].Invoke(new Func(() => { + try + { + if (Clipboard.ContainsText()) + { + return Clipboard.GetText(); + } + else + { + return "[Clipboard is empty or contains non-text data]"; + } + } + catch (Exception ex) + { + return $"Error reading clipboard: {ex.Message}"; + } + })); + } + return clipboardText; + } + } +} diff --git a/FlowVision/lib/Plugins/FileSystemPlugin.cs b/FlowVision/lib/Plugins/FileSystemPlugin.cs new file mode 100644 index 0000000..4400e84 --- /dev/null +++ b/FlowVision/lib/Plugins/FileSystemPlugin.cs @@ -0,0 +1,81 @@ +using System; +using System.ComponentModel; +using System.IO; +using System.Linq; +using FlowVision.lib.Classes; + +namespace FlowVision.lib.Plugins +{ + internal class FileSystemPlugin + { + [Description("Gets the current working directory of the application")] + public string GetCurrentDirectory() + { + PluginLogger.LogPluginUsage("FileSystemPlugin", "GetCurrentDirectory"); + return Directory.GetCurrentDirectory(); + } + + [Description("Lists files and directories in the specified path. Returns first 50 entries.")] + public string ListDirectory(string path) + { + PluginLogger.LogPluginUsage("FileSystemPlugin", "ListDirectory", path); + try + { + if (!Directory.Exists(path)) return $"Directory not found: {path}"; + + var dirs = Directory.GetDirectories(path).Select(d => $"[DIR] {Path.GetFileName(d)}"); + var files = Directory.GetFiles(path).Select(f => Path.GetFileName(f)); + + var all = dirs.Concat(files).Take(50); + return string.Join("\n", all); + } + catch (Exception ex) + { + return $"Error: {ex.Message}"; + } + } + + [Description("Checks if a file exists at the specified path")] + public bool FileExists(string path) + { + PluginLogger.LogPluginUsage("FileSystemPlugin", "FileExists", path); + return File.Exists(path); + } + + [Description("Reads the content of a text file (max 2000 chars)")] + public string ReadFile(string path) + { + PluginLogger.LogPluginUsage("FileSystemPlugin", "ReadFile", path); + try + { + if (!File.Exists(path)) return "File not found"; + + string content = File.ReadAllText(path); + if (content.Length > 2000) + { + return content.Substring(0, 2000) + "\n...[Truncated]..."; + } + return content; + } + catch (Exception ex) + { + return $"Error: {ex.Message}"; + } + } + + [Description("Writes text content to a file. Overwrites if exists.")] + public string WriteFile(string path, string content) + { + PluginLogger.LogPluginUsage("FileSystemPlugin", "WriteFile", path); + try + { + File.WriteAllText(path, content); + return $"Successfully wrote to {path}"; + } + catch (Exception ex) + { + return $"Error writing file: {ex.Message}"; + } + } + } +} diff --git a/FlowVision/lib/Plugins/KeyboardPlugin.cs b/FlowVision/lib/Plugins/KeyboardPlugin.cs index ca8665d..05747e1 100644 --- a/FlowVision/lib/Plugins/KeyboardPlugin.cs +++ b/FlowVision/lib/Plugins/KeyboardPlugin.cs @@ -1,13 +1,13 @@ -using System; +using System; using System.Collections.Generic; using System.ComponentModel; using System.Linq; using System.Runtime.InteropServices; using System.Text; +using System.Threading; using System.Threading.Tasks; using System.Windows.Forms; using FlowVision.lib.Classes; -using Microsoft.SemanticKernel; namespace FlowVision.lib.Plugins { @@ -23,7 +23,22 @@ internal class KeyboardPlugin [DllImport("user32.dll", SetLastError = true)] private static extern void keybd_event(byte bVk, byte bScan, uint dwFlags, UIntPtr dwExtraInfo); - [KernelFunction, Description("Used to interact with the keyboard")] + [DllImport("user32.dll", SetLastError = true)] + private static extern bool SetForegroundWindow(IntPtr hWnd); + + [DllImport("user32.dll", SetLastError = true)] + private static extern IntPtr GetForegroundWindow(); + + [DllImport("user32.dll")] + private static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach); + + [DllImport("kernel32.dll")] + private static extern uint GetCurrentThreadId(); + + [DllImport("user32.dll", SetLastError = true)] + private static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint lpdwProcessId); + + [Description("Used to interact with the keyboard")] public async Task SendKey(string keyCombo) { PluginLogger.LogPluginUsage("KeyboardPlugin", "SendKey", keyCombo); @@ -38,7 +53,45 @@ public async Task SendKey(string keyCombo) } } - [KernelFunction, Description("Enter Key")] + [Description("Send keyboard input to a specific window by handle. keys format: standard SendKeys (e.g. 'Hello', '{ENTER}', '^c').")] + public async Task SendKeyToWindow(string windowHandleString, string keyCombo) + { + PluginLogger.LogPluginUsage("KeyboardPlugin", "SendKeyToWindow", + $"window={windowHandleString}, keys={keyCombo}"); + + try + { + IntPtr windowHandle = new IntPtr(Convert.ToInt32(windowHandleString)); + + // Bring window to foreground with proper focus + if (!BringWindowToForegroundWithFocus(windowHandle)) + { + PluginLogger.LogError("KeyboardPlugin", "SendKeyToWindow", + "Failed to bring window to foreground"); + return false; + } + + // Wait a bit for the window to become active + // Increased to 500ms to ensure slower apps (like Notepad startup) are ready + await Task.Delay(500); + + // Send the keys + SendKeys.SendWait(keyCombo); + + PluginLogger.LogInfo("KeyboardPlugin", "SendKeyToWindow", + $"✓ Successfully sent keys to window {windowHandleString}"); + + return true; + } + catch (Exception ex) + { + PluginLogger.LogError("KeyboardPlugin", "SendKeyToWindow", + $"Error: {ex.Message}"); + return false; + } + } + + [Description("Enter Key")] public async Task EnterKey() { PluginLogger.LogPluginUsage("KeyboardPlugin", "EnterKey", "ENTER"); @@ -53,7 +106,13 @@ public async Task EnterKey() } } - [KernelFunction, Description("Ctrl + Letter")] + [Description("Send Enter key to a specific window by handle")] + public async Task EnterKeyToWindow(string windowHandleString) + { + return await SendKeyToWindow(windowHandleString, "{ENTER}"); + } + + [Description("Ctrl + Letter")] public async Task CtrlKey(string letter) { PluginLogger.LogPluginUsage("KeyboardPlugin", "CtrlKey", letter); @@ -68,9 +127,91 @@ public async Task CtrlKey(string letter) } } + [Description("Send Ctrl+Letter combination to a specific window by handle")] + public async Task CtrlKeyToWindow(string windowHandleString, string letter) + { + return await SendKeyToWindow(windowHandleString, $"^({letter})"); + } + + [DllImport("user32.dll")] + private static extern bool ShowWindow(IntPtr hWnd, int nCmdShow); + + [DllImport("user32.dll")] + private static extern IntPtr SetFocus(IntPtr hWnd); + + private const int SW_RESTORE = 9; + + // ... (existing imports) + + /// + /// Brings a window to the foreground and ensures it has focus using multiple techniques + /// + private bool BringWindowToForegroundWithFocus(IntPtr hWnd) + { + if (hWnd == IntPtr.Zero) + return false; + + try + { + // 1. Force window restore if minimized + ShowWindow(hWnd, SW_RESTORE); + + // Get the current foreground window + IntPtr currentForeground = GetForegroundWindow(); + + // Get thread IDs + uint currentThreadId = GetCurrentThreadId(); + uint targetThreadId = GetWindowThreadProcessId(hWnd, out _); + uint foregroundThreadId = GetWindowThreadProcessId(currentForeground, out _); + + // Attach to the foreground thread to bypass restrictions + bool needsDetach = false; + if (currentThreadId != foregroundThreadId) + { + AttachThreadInput(currentThreadId, foregroundThreadId, true); + needsDetach = true; + } + + // Also attach to the target thread if it's different + if (targetThreadId != currentThreadId && targetThreadId != foregroundThreadId) + { + AttachThreadInput(currentThreadId, targetThreadId, true); + } + + // Try to set foreground window + bool success = SetForegroundWindow(hWnd); + + // Force focus to the specific handle + SetFocus(hWnd); + + // Detach if we attached + if (needsDetach) + { + AttachThreadInput(currentThreadId, foregroundThreadId, false); + } + if (targetThreadId != currentThreadId && targetThreadId != foregroundThreadId) + { + AttachThreadInput(currentThreadId, targetThreadId, false); + } + + // Give it a moment to process + Thread.Sleep(50); + + // Verify the window is now in foreground + IntPtr newForeground = GetForegroundWindow(); + return newForeground == hWnd; + } + catch (Exception ex) + { + PluginLogger.LogError("KeyboardPlugin", "BringWindowToForegroundWithFocus", + $"Error: {ex.Message}"); + return false; + } + } + /* * - *[KernelFunction, Description("Alt + F4")] + *[Description("Alt + F4")] public async Task AltFFour() { FlowVision.lib.Classes.PluginLogger.LogPluginUsage("KeyboardPlugin", "AltFFour", "Alt + F4"); @@ -89,7 +230,7 @@ public async Task AltFFour() */ /* - [KernelFunction, Description("Opens the Run dialog (Windows+R)")] + [Description("Opens the Run dialog (Windows+R)")] public async Task OpenRunDialog() { FlowVision.lib.Classes.PluginLogger.LogPluginUsage("KeyboardPlugin", "OpenRunDialog", "Windows+R"); diff --git a/FlowVision/lib/Plugins/MousePlugin.cs b/FlowVision/lib/Plugins/MousePlugin.cs index 31d491f..cb8faee 100644 --- a/FlowVision/lib/Plugins/MousePlugin.cs +++ b/FlowVision/lib/Plugins/MousePlugin.cs @@ -1,10 +1,9 @@ -using System; +using System; using System.ComponentModel; using System.Runtime.InteropServices; using System.Threading.Tasks; using System.Windows.Forms; using FlowVision.lib.Classes; -using Microsoft.SemanticKernel; namespace FlowVision.lib.Plugins { @@ -19,20 +18,49 @@ internal class MousePlugin [DllImport("user32.dll", SetLastError = true)] private static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect); + [DllImport("user32.dll", SetLastError = true)] + private static extern bool SetForegroundWindow(IntPtr hWnd); + + [DllImport("user32.dll", SetLastError = true)] + private static extern IntPtr GetForegroundWindow(); + + [DllImport("user32.dll")] + private static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool fAttach); + + [DllImport("kernel32.dll")] + private static extern uint GetCurrentThreadId(); + + [DllImport("user32.dll", SetLastError = true)] + private static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint lpdwProcessId); + + [DllImport("user32.dll")] + private static extern bool ShowWindow(IntPtr hWnd, int nCmdShow); + + [DllImport("user32.dll")] + private static extern IntPtr SetFocus(IntPtr hWnd); + + private const int SW_RESTORE = 9; private const uint MOUSEEVENTF_LEFTDOWN = 0x02; private const uint MOUSEEVENTF_LEFTUP = 0x04; private const uint MOUSEEVENTF_RIGHTDOWN = 0x08; private const uint MOUSEEVENTF_RIGHTUP = 0x10; - [KernelFunction, Description("Clicks at the specified normalized bounding box coordinates on a specific window handle.")] - public async Task ClickOnWindow(string windowHandleString, double[] bBox, bool leftClick, int clickTimes) + [Description("Clicks at the specified normalized bounding box coordinates on a specific window handle. Box is [x1, y1, x2, y2].")] + public async Task ClickOnWindow(string windowHandleString, double x1, double y1, double x2, double y2, bool leftClick, int clickTimes) { // Log the plugin usage PluginLogger.LogPluginUsage("MousePlugin", "ClickOnWindow", - $"window={windowHandleString}, pos={string.Join(",", bBox)}, leftClick={leftClick}, times={clickTimes}"); + $"window={windowHandleString}, box=[{x1},{y1},{x2},{y2}], leftClick={leftClick}"); IntPtr windowHandle = new IntPtr(Convert.ToInt32(windowHandleString)); + // Ensure window is visible and focused + if (!BringWindowToForegroundWithFocus(windowHandle)) + { + PluginLogger.LogError("MousePlugin", "ClickOnWindow", "Failed to focus window"); + return false; + } + if (!GetWindowRect(windowHandle, out RECT rc)) { throw new InvalidOperationException("Failed to get window rectangle."); @@ -41,16 +69,64 @@ public async Task ClickOnWindow(string windowHandleString, double[] bBox, int windowWidth = rc.Right - rc.Left; int windowHeight = rc.Bottom - rc.Top; - // Calculate absolute position based on bounding box (normalized) - int x = rc.Left + (int)((bBox[0] + bBox[2]) / 2 * windowWidth); - int y = rc.Top + (int)((bBox[1] + bBox[3]) / 2 * windowHeight); + // Calculate center of bounding box + double centerX = (x1 + x2) / 2; + double centerY = (y1 + y2) / 2; + + int x, y; + + // Check if coordinates are normalized (0-1 range) + bool isNormalized = (x1 >= 0 && x1 <= 1.0 && y1 >= 0 && y1 <= 1.0 && + x2 >= 0 && x2 <= 1.0 && y2 >= 0 && y2 <= 1.0); + + if (isNormalized) + { + // Normalized: multiply by window size + x = rc.Left + (int)(centerX * windowWidth); + y = rc.Top + (int)(centerY * windowHeight); + } + else + { + // Window-relative pixel coordinates: add window origin + x = rc.Left + (int)centerX; + y = rc.Top + (int)centerY; + } + + PluginLogger.LogInfo("MousePlugin", "ClickOnWindow", + $"bbox center ({centerX:F0}, {centerY:F0}) + window ({rc.Left}, {rc.Top}) -> screen ({x}, {y})"); + + if (!SetCursorPos(x, y)) + { + throw new InvalidOperationException("Failed to set cursor position."); + } + + // Increased delay to allow UI to register hover state + await Task.Delay(200); + + for (int i = 0; i < clickTimes; i++) + { + SimulateClick(x, y, leftClick); + await Task.Delay(50); + } + + return true; + } + + [Description("Clicks at screen coordinates. Use with GetPageElements() to find elements.")] + public async Task ClickAtScreenCoordinates(double x1, double y1, double x2, double y2, bool leftClick, int clickTimes) + { + int x = (int)((x1 + x2) / 2); + int y = (int)((y1 + y2) / 2); + + PluginLogger.LogPluginUsage("MousePlugin", "ClickAtScreenCoordinates", + $"box=[{x1},{y1},{x2},{y2}], center=({x},{y}), leftClick={leftClick}"); if (!SetCursorPos(x, y)) { throw new InvalidOperationException("Failed to set cursor position."); } - await Task.Delay(100); + await Task.Delay(200); for (int i = 0; i < clickTimes; i++) { @@ -61,13 +137,20 @@ public async Task ClickOnWindow(string windowHandleString, double[] bBox, return true; } - [KernelFunction, Description("uses scroll wheel on a specific window handle.")] + [Description("uses scroll wheel on a specific window handle.")] public async Task ScrollOnWindow(string windowHandleString, int scrollAmount) { // Log the plugin usage PluginLogger.LogPluginUsage("MousePlugin", "ScrollOnWindow", $"window={windowHandleString}, amount={scrollAmount}"); + IntPtr windowHandle = new IntPtr(Convert.ToInt32(windowHandleString)); + + if (!BringWindowToForegroundWithFocus(windowHandle)) + { + return false; + } + if (!GetWindowRect(windowHandle, out RECT rc)) { throw new InvalidOperationException("Failed to get window rectangle."); @@ -78,7 +161,7 @@ public async Task ScrollOnWindow(string windowHandleString, int scrollAmou { throw new InvalidOperationException("Failed to set cursor position."); } - await Task.Delay(100); + await Task.Delay(200); mouse_event(0x0800, 0, 0, (uint)scrollAmount, UIntPtr.Zero); return true; } @@ -92,6 +175,51 @@ private void SimulateClick(int x, int y, bool leftClick) mouse_event(up, (uint)x, (uint)y, 0, UIntPtr.Zero); } + /// + /// Brings a window to the foreground and ensures it has focus using multiple techniques + /// + private bool BringWindowToForegroundWithFocus(IntPtr hWnd) + { + if (hWnd == IntPtr.Zero) return false; + + try + { + ShowWindow(hWnd, SW_RESTORE); + IntPtr currentForeground = GetForegroundWindow(); + if (currentForeground == hWnd) return true; + + uint currentThreadId = GetCurrentThreadId(); + uint targetThreadId = GetWindowThreadProcessId(hWnd, out _); + uint foregroundThreadId = GetWindowThreadProcessId(currentForeground, out _); + + bool needsDetach = false; + if (currentThreadId != foregroundThreadId) + { + AttachThreadInput(currentThreadId, foregroundThreadId, true); + needsDetach = true; + } + if (targetThreadId != currentThreadId && targetThreadId != foregroundThreadId) + { + AttachThreadInput(currentThreadId, targetThreadId, true); + } + + bool success = SetForegroundWindow(hWnd); + SetFocus(hWnd); + + if (needsDetach) AttachThreadInput(currentThreadId, foregroundThreadId, false); + if (targetThreadId != currentThreadId && targetThreadId != foregroundThreadId) + AttachThreadInput(currentThreadId, targetThreadId, false); + + System.Threading.Thread.Sleep(100); + return GetForegroundWindow() == hWnd; + } + catch (Exception ex) + { + PluginLogger.LogError("MousePlugin", "BringWindowToForegroundWithFocus", $"Error: {ex.Message}"); + return false; + } + } + [StructLayout(LayoutKind.Sequential)] private struct RECT { diff --git a/FlowVision/lib/Plugins/PlaywrightPlugin.cs b/FlowVision/lib/Plugins/PlaywrightPlugin.cs new file mode 100644 index 0000000..21e9595 --- /dev/null +++ b/FlowVision/lib/Plugins/PlaywrightPlugin.cs @@ -0,0 +1,1513 @@ +using System; +using System.ComponentModel; +using System.Diagnostics; +using System.IO; +using System.Threading.Tasks; +using FlowVision.lib.Classes; +using Microsoft.Playwright; +using System.Collections.Generic; +using System.Threading; +using System.Text.Json; + +namespace FlowVision.lib.Plugins +{ + /// + /// Playwright plugin for browser automation within FlowVision. + /// Uses singleton pattern to maintain browser state across multiple calls. + /// + internal class PlaywrightPlugin + { + // Singleton instance to maintain state across calls + private static PlaywrightPlugin _instance; + private static readonly object _lock = new object(); + + // Instance variables that maintain state + private IPlaywright _playwright; + private IBrowser _browser; + private IBrowserContext _context; + private IPage _page; + private bool _initialized = false; + private readonly SemaphoreSlim _semaphore = new SemaphoreSlim(1, 1); + private string _currentSessionId = "default"; + private bool _useSession = true; + private bool _autoSaveSession = true; // Auto-save after actions + + /// + /// Gets the singleton instance of PlaywrightPlugin + /// + public static PlaywrightPlugin Instance + { + get + { + if (_instance == null) + { + lock (_lock) + { + if (_instance == null) + { + _instance = new PlaywrightPlugin(); + } + } + } + return _instance; + } + } + + // Private constructor to enforce singleton pattern + private PlaywrightPlugin() + { + } + + // Public constructor for backward compatibility (delegates to singleton) + public PlaywrightPlugin(bool useSingleton) : this() + { + // This allows new PlaywrightPlugin() to work but still use the singleton + } + + /// + /// Gets whether a browser instance is currently active. + /// + [Description("Checks if a browser is already running")] + public string IsBrowserActive() + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "IsBrowserActive"); + + if (_browser != null) + { + return $"Yes, a browser is currently active. You can continue using the existing browser without launching a new one."; + } + else + { + return "No browser is currently active. You need to call LaunchBrowser first."; + } + } + + /// + /// Gets information about the current Playwright status. + /// + [Description("Gets detailed information about the current browser status")] + public string GetBrowserStatus() + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "GetBrowserStatus"); + + var status = new Dictionary + { + { "PlaywrightInitialized", _initialized ? "Yes" : "No" }, + { "BrowserActive", _browser != null ? "Yes" : "No" }, + { "CurrentSessionId", _currentSessionId }, + { "SessionPersistenceEnabled", _useSession ? "Yes" : "No" } + }; + + return JsonSerializer.Serialize(status, new JsonSerializerOptions { WriteIndented = true }); + } + + /// + /// Initializes the Playwright environment if not already done. + /// Automatically installs browsers if they are missing. + /// + private async Task InitializePlaywrightAsync() + { + if (_initialized) return; + + await _semaphore.WaitAsync(); + + try + { + if (!_initialized) + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "Initialize"); + PluginLogger.NotifyTaskStart("Playwright initialization", "Setting up browser automation environment"); + + // Set the driver path to the bundled .playwright folder + string appDir = AppDomain.CurrentDomain.BaseDirectory; + string driverPath = Path.Combine(appDir, ".playwright"); + if (Directory.Exists(driverPath)) + { + Environment.SetEnvironmentVariable("PLAYWRIGHT_DRIVER_PATH", driverPath); + PluginLogger.LogInfo("PlaywrightPlugin", "Initialize", $"Set PLAYWRIGHT_DRIVER_PATH to {driverPath}"); + } + + // Check if browsers are installed, if not install them + await EnsureBrowsersInstalledAsync(); + + _playwright = await Microsoft.Playwright.Playwright.CreateAsync(); + _initialized = true; + + PluginLogger.NotifyTaskComplete("Playwright initialization"); + } + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Playwright initialization", false); + throw new Exception($"Failed to initialize Playwright: {ex.Message}", ex); + } + finally + { + _semaphore.Release(); + } + } + + /// + /// Ensures Playwright browsers are installed. Downloads them if missing. + /// + private async Task EnsureBrowsersInstalledAsync() + { + // Check if chromium browser exists in the default location + string playwrightBrowsersPath = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), + "ms-playwright"); + + // Always set the browsers path environment variable + Environment.SetEnvironmentVariable("PLAYWRIGHT_BROWSERS_PATH", playwrightBrowsersPath); + PluginLogger.LogInfo("PlaywrightPlugin", "EnsureBrowsersInstalled", $"Set PLAYWRIGHT_BROWSERS_PATH to {playwrightBrowsersPath}"); + + bool browsersExist = Directory.Exists(playwrightBrowsersPath) && + Directory.GetDirectories(playwrightBrowsersPath, "chromium-*").Length > 0; + + if (browsersExist) + { + PluginLogger.LogInfo("PlaywrightPlugin", "EnsureBrowsersInstalled", "Playwright browsers already installed"); + return; + } + + PluginLogger.NotifyTaskStart("Browser download", "Downloading Chromium browser (first-time setup)"); + PluginLogger.LogInfo("PlaywrightPlugin", "EnsureBrowsersInstalled", "Installing Playwright browsers..."); + + try + { + // Find the playwright.ps1 script in the application directory + string appDir = AppDomain.CurrentDomain.BaseDirectory; + string playwrightScript = Path.Combine(appDir, "playwright.ps1"); + + // Alternative: use the .playwright folder structure + string playwrightCmd = Path.Combine(appDir, ".playwright", "package", "cli.js"); + string nodeExe = Path.Combine(appDir, ".playwright", "node", "win32_x64", "node.exe"); + + ProcessStartInfo psi; + + if (File.Exists(nodeExe) && File.Exists(playwrightCmd)) + { + // Use bundled node and playwright CLI + psi = new ProcessStartInfo + { + FileName = nodeExe, + Arguments = $"\"{playwrightCmd}\" install chromium", + UseShellExecute = false, + RedirectStandardOutput = true, + RedirectStandardError = true, + CreateNoWindow = true, + WorkingDirectory = appDir + }; + // Set browsers path for the install process + psi.EnvironmentVariables["PLAYWRIGHT_BROWSERS_PATH"] = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), + "ms-playwright"); + } + else if (File.Exists(playwrightScript)) + { + // Use playwright.ps1 script + psi = new ProcessStartInfo + { + FileName = "powershell.exe", + Arguments = $"-ExecutionPolicy Bypass -File \"{playwrightScript}\" install chromium", + UseShellExecute = false, + RedirectStandardOutput = true, + RedirectStandardError = true, + CreateNoWindow = true, + WorkingDirectory = appDir + }; + } + else + { + // Fallback: try using npx or global playwright + psi = new ProcessStartInfo + { + FileName = "cmd.exe", + Arguments = "/c npx playwright install chromium", + UseShellExecute = false, + RedirectStandardOutput = true, + RedirectStandardError = true, + CreateNoWindow = true, + WorkingDirectory = appDir + }; + } + + using (var process = new Process { StartInfo = psi }) + { + var outputBuilder = new System.Text.StringBuilder(); + var errorBuilder = new System.Text.StringBuilder(); + + process.OutputDataReceived += (sender, e) => + { + if (e.Data != null) + { + outputBuilder.AppendLine(e.Data); + PluginLogger.LogInfo("PlaywrightPlugin", "BrowserInstall", e.Data); + } + }; + + process.ErrorDataReceived += (sender, e) => + { + if (e.Data != null) + { + errorBuilder.AppendLine(e.Data); + } + }; + + process.Start(); + process.BeginOutputReadLine(); + process.BeginErrorReadLine(); + + // Wait for the process to complete (with timeout) + await Task.Run(() => process.WaitForExit(300000)); // 5 minute timeout + + if (!process.HasExited) + { + process.Kill(); + throw new Exception("Browser installation timed out after 5 minutes"); + } + + if (process.ExitCode != 0) + { + string error = errorBuilder.ToString(); + if (!string.IsNullOrEmpty(error)) + { + throw new Exception($"Browser installation failed: {error}"); + } + } + + PluginLogger.NotifyTaskComplete("Browser download"); + PluginLogger.LogInfo("PlaywrightPlugin", "EnsureBrowsersInstalled", "Playwright browsers installed successfully"); + } + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Browser download", false); + PluginLogger.LogError("PlaywrightPlugin", "EnsureBrowsersInstalled", $"Failed to install browsers: {ex.Message}"); + throw new Exception($"Failed to install Playwright browsers. Please run 'playwright install chromium' manually. Error: {ex.Message}", ex); + } + } + + /// + /// Sets the active session ID for browser operations. + /// + [Description("Sets the active session ID for browser operations")] + public string SetSessionId( + [Description("Session ID to use")] string sessionId = "default") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "SetSessionId", $"SessionId: {sessionId}"); + + if (string.IsNullOrEmpty(sessionId)) + { + sessionId = "default"; + } + + _currentSessionId = sessionId; + return $"Session ID set to: {sessionId}"; + } + + /// + /// Enables or disables session persistence. + /// + [Description("Enables or disables session persistence")] + public string EnableSessionPersistence( + [Description("Enable session persistence (true/false)")] string enable = "true") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "EnableSessionPersistence", $"Enable: {enable}"); + + bool enableSession = string.IsNullOrEmpty(enable) ? true : bool.Parse(enable); + _useSession = enableSession; + + return $"Session persistence set to: {enableSession}"; + } + + /// + /// Launches a browser with the specified options. + /// + [Description("Launches a new browser instance or uses existing browser if already running")] + public async Task LaunchBrowser( + [Description("Browser type (chromium, firefox, webkit)")] string browserType = "chromium", + [Description("Launch as headless browser (true/false)")] string headless = "true", + [Description("Force launch a new browser even if one exists (true/false)")] string forceNew = "false") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "LaunchBrowser", $"Type: {browserType}, Headless: {headless}, ForceNew: {forceNew}"); + + // Check if we should force a new browser launch + bool shouldForceNew = !string.IsNullOrEmpty(forceNew) && bool.Parse(forceNew); + + // Check if a browser is already active + if (_browser != null && !shouldForceNew) + { + return $"Using existing {browserType} browser that is already running. To force a new browser launch, set forceNew to true."; + } + + PluginLogger.NotifyTaskStart("Browser launch", $"Starting {browserType} browser"); + + await InitializePlaywrightAsync(); + + try + { + // Close existing browser if one is open and we're forcing a new one + if (_browser != null) + { + if (_page != null) + { + try { await _page.CloseAsync(); } catch { /* ignore */ } + _page = null; + } + + if (_context != null) + { + try { await _context.CloseAsync(); } catch { /* ignore */ } + _context = null; + } + + await _browser.CloseAsync(); + await _browser.DisposeAsync(); + _browser = null; + } + + // Parse headless parameter + bool isHeadless = string.IsNullOrEmpty(headless) ? true : bool.Parse(headless); + + // Launch browser based on type + switch (browserType.ToLower()) + { + case "firefox": + _browser = await _playwright.Firefox.LaunchAsync(new BrowserTypeLaunchOptions { Headless = isHeadless }); + break; + case "webkit": + _browser = await _playwright.Webkit.LaunchAsync(new BrowserTypeLaunchOptions { Headless = isHeadless }); + break; + default: + _browser = await _playwright.Chromium.LaunchAsync(new BrowserTypeLaunchOptions { Headless = isHeadless }); + break; + } + + // Check if we should restore a session + if (_useSession) + { + var storageState = PlaywrightSessionManager.GetStorageState(_currentSessionId); + if (storageState != null) + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "RestoreSession", $"Restoring session: {_currentSessionId}"); + + var storageFile = await SaveStorageStateToTempFile(storageState); + + // Create context with storage state + _context = await _browser.NewContextAsync(new BrowserNewContextOptions + { + StorageStatePath = storageFile + }); + + // Clean up temp file + try { File.Delete(storageFile); } catch { /* Ignore errors */ } + } + else + { + // Create new context + _context = await _browser.NewContextAsync(); + } + } + else + { + // Create new context without session + _context = await _browser.NewContextAsync(); + } + + // Create a new page + _page = await _context.NewPageAsync(); + + PluginLogger.NotifyTaskComplete("Browser launch"); + return $"Successfully launched {browserType} browser"; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Browser launch", false); + return $"Error launching browser: {ex.Message}"; + } + } + + /// + /// Saves the current browser session for future use. + /// + [Description("Saves the current browser session for future use")] + public async Task SaveSession() + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "SaveSession", $"SessionId: {_currentSessionId}"); + + try + { + if (_context == null) + { + return "Error: Browser context not available. Launch browser first."; + } + + PluginLogger.NotifyTaskStart("Session save", $"Saving session: {_currentSessionId}"); + + // Get storage state from current context + var storageState = await _context.StorageStateAsync(); + + // Save session state + PlaywrightSessionManager.SaveStorageState(_currentSessionId, storageState); + + PluginLogger.NotifyTaskComplete("Session save"); + return $"Session saved successfully with ID: {_currentSessionId}. Cookies and login state preserved."; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Session save", false); + return $"Error saving session: {ex.Message}"; + } + } + + /// + /// Automatically saves the session if auto-save is enabled + /// + private async Task AutoSaveSessionIfEnabled() + { + if (_autoSaveSession && _useSession && _context != null) + { + try + { + var storageState = await _context.StorageStateAsync(); + PlaywrightSessionManager.SaveStorageState(_currentSessionId, storageState); + } + catch (Exception) + { + // Silently fail auto-save - don't interrupt user's flow + } + } + } + + /// + /// Enables or disables automatic session saving + /// + [Description("Enables or disables automatic session saving after actions")] + public string EnableAutoSave( + [Description("Enable auto-save (true/false)")] string enable = "true") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "EnableAutoSave", $"Enable: {enable}"); + + bool enableAutoSave = string.IsNullOrEmpty(enable) ? true : bool.Parse(enable); + _autoSaveSession = enableAutoSave; + + return $"Auto-save session set to: {enableAutoSave}. " + + (enableAutoSave ? "Sessions will be automatically saved after navigation and actions." : "Sessions must be manually saved."); + } + + /// + /// Waits for user to complete manual actions (like logging in) and then saves the session + /// + [Description("Waits for user to complete manual actions (like login) then saves session")] + public async Task WaitForUserAndSaveSession( + [Description("Number of seconds to wait")] string seconds = "30") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "WaitForUserAndSaveSession", $"Seconds: {seconds}"); + + try + { + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + int waitTime = int.Parse(seconds); + + PluginLogger.NotifyTaskStart("Waiting for user", $"Waiting {waitTime} seconds for user to complete actions"); + + // Wait for the specified time + await Task.Delay(waitTime * 1000); + + // Save the session + var result = await SaveSession(); + + PluginLogger.NotifyTaskComplete("Waiting for user"); + return $"Waited {waitTime} seconds. {result}"; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Waiting for user", false); + return $"Error waiting for user: {ex.Message}"; + } + } + + /// + /// Waits for a specific element to appear (indicating user has logged in or completed action) + /// + [Description("Waits for a specific element to appear, then saves session. Useful for detecting successful login")] + public async Task WaitForElementAndSave( + [Description("CSS selector or text of element to wait for")] string selector, + [Description("Maximum seconds to wait")] string timeout = "30") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "WaitForElementAndSave", $"Selector: {selector}"); + + try + { + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + int timeoutMs = int.Parse(timeout) * 1000; + + PluginLogger.NotifyTaskStart("Waiting for element", $"Waiting for: {selector}"); + + // Wait for element + await _page.WaitForSelectorAsync(selector, new PageWaitForSelectorOptions + { + Timeout = timeoutMs, + State = WaitForSelectorState.Visible + }); + + // Save the session + var result = await SaveSession(); + + PluginLogger.NotifyTaskComplete("Waiting for element"); + return $"Element appeared: {selector}. {result}"; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Waiting for element", false); + return $"Error waiting for element: {ex.Message}"; + } + } + + /// + /// Navigates to the specified URL. + /// + [Description("Navigates to a specified URL")] + public async Task NavigateTo( + [Description("URL to navigate to")] string url, + [Description("Navigation wait strategy: load, domcontentloaded, networkidle")] + string waitUntil = "load") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "NavigateTo", $"URL: {url}"); + + try + { + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + PluginLogger.NotifyTaskStart("Navigation", $"Navigating to {url}"); + + // Ensure URL has protocol + if (!url.StartsWith("http://") && !url.StartsWith("https://")) + { + url = "https://" + url; + } + + // Determine wait strategy + WaitUntilState waitState = WaitUntilState.Load; + if (!string.IsNullOrEmpty(waitUntil)) + { + switch (waitUntil.ToLower()) + { + case "domcontentloaded": + waitState = WaitUntilState.DOMContentLoaded; + break; + case "networkidle": + waitState = WaitUntilState.NetworkIdle; + break; + case "commit": + waitState = WaitUntilState.Commit; + break; + default: + waitState = WaitUntilState.Load; + break; + } + } + + // Navigate to the URL using the chosen strategy with a timeout + var response = await _page.GotoAsync(url, new PageGotoOptions + { + WaitUntil = waitState, + Timeout = 30000 + }); + + // Wait a bit for any redirects or dynamic content to settle + await Task.Delay(1000); + + // Get the actual URL after any redirects + string actualUrl = _page.Url; + + // Auto-save session state after navigation + await AutoSaveSessionIfEnabled(); + + PluginLogger.NotifyTaskComplete("Navigation"); + + if (actualUrl != url && !actualUrl.StartsWith(url)) + { + return $"Navigated to {url} but was redirected to {actualUrl}. This may indicate you are already logged in. Call GetPageElements() to see current page."; + } + + return $"Successfully navigated to {actualUrl}. Call GetPageElements() to see available elements."; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Navigation", false); + return $"Error navigating to {url}: {ex.Message}"; + } + } + + /// + /// Waits for the page to finish loading after a navigation or action. + /// + [Description("Waits for the page to finish loading. Use after clicking links or submitting forms.")] + public async Task WaitForPageLoad( + [Description("Wait strategy: load, domcontentloaded, networkidle")] string waitUntil = "load", + [Description("Maximum time to wait in seconds")] string timeout = "30") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "WaitForPageLoad", $"WaitUntil: {waitUntil}"); + + try + { + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + PluginLogger.NotifyTaskStart("Page load", "Waiting for page to load"); + + int timeoutMs = int.Parse(timeout) * 1000; + + // Determine wait strategy + WaitUntilState waitState = WaitUntilState.Load; + switch (waitUntil?.ToLower()) + { + case "domcontentloaded": + waitState = WaitUntilState.DOMContentLoaded; + break; + case "networkidle": + waitState = WaitUntilState.NetworkIdle; + break; + case "commit": + waitState = WaitUntilState.Commit; + break; + } + + await _page.WaitForLoadStateAsync(LoadState.Load, new PageWaitForLoadStateOptions + { + Timeout = timeoutMs + }); + + // Also wait for network to be idle for better reliability + try + { + await _page.WaitForLoadStateAsync(LoadState.NetworkIdle, new PageWaitForLoadStateOptions + { + Timeout = 5000 + }); + } + catch (TimeoutException) + { + // Network idle timeout is acceptable, page may have long-polling + } + + // Auto-save session state after page load + await AutoSaveSessionIfEnabled(); + + string currentUrl = _page.Url; + PluginLogger.NotifyTaskComplete("Page load"); + return $"Page loaded successfully. Current URL: {currentUrl}"; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Page load", false); + return $"Error waiting for page load: {ex.Message}"; + } + } + + // Helper method to save storage state to a temporary file + private async Task SaveStorageStateToTempFile(string storageState) + { + string tempFile = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString() + ".json"); + File.WriteAllText(tempFile, storageState); + return tempFile; + } + + /// + /// Takes a screenshot of the current page. + /// + [Description("Takes a screenshot of the current page")] + public async Task TakeScreenshot( + [Description("Path where the screenshot should be saved")] string path = null, + [Description("CSS selector to screenshot specific element")] string selector = null) + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "TakeScreenshot"); + + try + { + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + PluginLogger.NotifyTaskStart("Screenshot", "Capturing screenshot"); + + // Generate a default path if not provided + if (string.IsNullOrEmpty(path)) + { + string screenshotDir = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "Screenshots"); + + Directory.CreateDirectory(screenshotDir); + path = Path.Combine(screenshotDir, $"screenshot_{DateTime.Now:yyyyMMdd_HHmmss}.png"); + } + + // Take screenshot of specific element or full page + if (!string.IsNullOrEmpty(selector)) + { + var element = await _page.QuerySelectorAsync(selector); + if (element == null) + { + PluginLogger.NotifyTaskComplete("Screenshot", false); + return $"Error: Could not find element with selector: {selector}"; + } + + await element.ScreenshotAsync(new ElementHandleScreenshotOptions + { + Path = path + }); + } + else + { + await _page.ScreenshotAsync(new PageScreenshotOptions + { + Path = path, + FullPage = true + }); + } + + PluginLogger.NotifyTaskComplete("Screenshot"); + return $"Screenshot saved to: {path}"; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Screenshot", false); + return $"Error taking screenshot: {ex.Message}"; + } + } + + /// + /// Clicks on an element identified by a selector and automatically saves session state. + /// + [Description("Clicks on an element identified by CSS selector. Set waitForNavigation to true for links/submit buttons.")] + public async Task ClickElement( + [Description("CSS selector for the element to click")] string selector, + [Description("Wait for page navigation after click (true/false)")] string waitForNavigation = "false") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "ClickElement", $"Selector: {selector}"); + + try + { + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + if (string.IsNullOrEmpty(selector)) + { + return "Error: Selector cannot be empty"; + } + + bool shouldWaitForNav = !string.IsNullOrEmpty(waitForNavigation) && + bool.TryParse(waitForNavigation, out bool nav) && nav; + + PluginLogger.NotifyTaskStart("Click interaction", $"Clicking on element: {selector}"); + + // Wait for the element to be visible and enabled before clicking + var element = await _page.QuerySelectorAsync(selector); + if (element == null) + { + PluginLogger.NotifyTaskComplete("Click interaction", false); + return $"Error: Could not find element with selector: {selector}"; + } + + // Wait for element to be visible and enabled (interactable) + try + { + await _page.WaitForSelectorAsync(selector, new PageWaitForSelectorOptions + { + State = WaitForSelectorState.Visible, + Timeout = 5000 + }); + } + catch (TimeoutException) + { + PluginLogger.NotifyTaskComplete("Click interaction", false); + return $"Error: Element with selector '{selector}' did not become visible in time."; + } + + try + { + if (shouldWaitForNav) + { + // Click and wait for navigation + await Task.WhenAll( + _page.WaitForNavigationAsync(new PageWaitForNavigationOptions + { + WaitUntil = WaitUntilState.Load, + Timeout = 30000 + }), + _page.ClickAsync(selector, new PageClickOptions + { + Timeout = 5000 + }) + ); + + // Wait a bit more for the page to stabilize + try + { + await _page.WaitForLoadStateAsync(LoadState.NetworkIdle, new PageWaitForLoadStateOptions + { + Timeout = 5000 + }); + } + catch (TimeoutException) + { + // Network idle timeout is acceptable + } + } + else + { + await _page.ClickAsync(selector, new PageClickOptions + { + Timeout = 5000 + }); + } + } + catch (Exception clickEx) + { + PluginLogger.NotifyTaskComplete("Click interaction", false); + return $"Error: Failed to click element '{selector}': {clickEx.Message}"; + } + + // Auto-save session state after clicking + await AutoSaveSessionIfEnabled(); + + string result = $"Successfully clicked on element: {selector}."; + if (shouldWaitForNav) + { + result += $" Page navigated to: {_page.Url}"; + } + result += " Session automatically saved."; + + PluginLogger.NotifyTaskComplete("Click interaction"); + return result; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Click interaction", false); + return $"Error clicking element: {ex.Message}"; + } + } + + /// + /// Clicks on an element by its visible text. More reliable than CSS selectors for buttons and links. + /// + [Description("Clicks on a button or link by its visible text. Use this when CSS selectors don't work.")] + public async Task ClickByText( + [Description("The visible text of the button or link to click")] string text, + [Description("Wait for page navigation after click (true/false)")] string waitForNavigation = "false") + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "ClickByText", $"Text: {text}"); + + try + { + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + if (string.IsNullOrEmpty(text)) + { + return "Error: Text cannot be empty"; + } + + bool shouldWaitForNav = !string.IsNullOrEmpty(waitForNavigation) && + bool.TryParse(waitForNavigation, out bool nav) && nav; + + PluginLogger.NotifyTaskStart("Click by text", $"Clicking element with text: {text}"); + + try + { + // Use Playwright's getByText or getByRole for more reliable clicking + var locator = _page.GetByRole(AriaRole.Button, new PageGetByRoleOptions { Name = text }); + + if (await locator.CountAsync() == 0) + { + // Try link + locator = _page.GetByRole(AriaRole.Link, new PageGetByRoleOptions { Name = text }); + } + + if (await locator.CountAsync() == 0) + { + // Fall back to text matching + locator = _page.GetByText(text, new PageGetByTextOptions { Exact = false }); + } + + if (await locator.CountAsync() == 0) + { + PluginLogger.NotifyTaskComplete("Click by text", false); + return $"Error: Could not find element with text: {text}"; + } + + if (shouldWaitForNav) + { + await locator.First.ClickAsync(); + await _page.WaitForLoadStateAsync(LoadState.Load); + } + else + { + await locator.First.ClickAsync(); + } + + await AutoSaveSessionIfEnabled(); + + PluginLogger.NotifyTaskComplete("Click by text"); + return $"Successfully clicked element with text: {text}"; + } + catch (Exception clickEx) + { + PluginLogger.NotifyTaskComplete("Click by text", false); + return $"Error clicking element with text '{text}': {clickEx.Message}"; + } + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Click by text", false); + return $"Error: {ex.Message}"; + } + } + + /// + /// Types text into an input field identified by a selector and automatically saves session state. + /// + [Description("Types text into an input field identified by CSS selector")] + public async Task TypeText( + [Description("CSS selector for the input field")] string selector, + [Description("Text to type into the field")] string text) + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "TypeText", $"Selector: {selector}, Text length: {text?.Length ?? 0}"); + + try + { + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + if (string.IsNullOrEmpty(selector)) + { + return "Error: Selector cannot be empty"; + } + + PluginLogger.NotifyTaskStart("Text input", $"Typing into field: {selector}"); + + // Try multiple common selectors for Bing if a generic search selector is provided + string[] selectorsToTry; + if (selector == "input[name='q']" && await _page.QuerySelectorAsync(selector) == null) + { + // For Bing specifically, try multiple selector options + string currentUrl = _page.Url; + if (currentUrl.Contains("bing.com")) + { + selectorsToTry = new[] { + selector, + "#sb_form_q", + "input[type='search']", + "[aria-label='Enter your search term']", + "#SearchBox" + }; + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", "Detected Bing, will try multiple selectors"); + } + else + { + selectorsToTry = new[] { selector }; + } + } + else + { + selectorsToTry = new[] { selector }; + } + + // Variables for retry logic + const int maxRetries = 3; + int currentRetry = 0; + Exception lastException = null; + + while (currentRetry < maxRetries) + { + try + { + foreach (var currentSelector in selectorsToTry) + { + try + { + // Log which selector we're trying + if (selectorsToTry.Length > 1) + { + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", $"Trying selector: {currentSelector}"); + } + + // Check if element exists + var element = await _page.QuerySelectorAsync(currentSelector); + if (element == null) + { + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", $"Element not found with selector: {currentSelector}"); + continue; // Try next selector + } + + // Check if element is visible + bool isVisible = await element.IsVisibleAsync(); + if (!isVisible) + { + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", $"Element not visible with selector: {currentSelector}"); + continue; // Try next selector + } + + // Check if element is enabled (not disabled) + bool isDisabled = await element.EvaluateAsync("el => el.disabled"); + if (isDisabled) + { + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", $"Element is disabled with selector: {currentSelector}"); + continue; // Try next selector + } + + // Try to scroll element into view for better interaction + await element.ScrollIntoViewIfNeededAsync(); + + // Get element dimensions to verify it has size + var boundingBox = await element.BoundingBoxAsync(); + if (boundingBox == null || boundingBox.Width <= 0 || boundingBox.Height <= 0) + { + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", $"Element has no dimensions with selector: {currentSelector}"); + continue; + } + + // First try to focus the element + await element.FocusAsync(); + + // Wait for the element to be actually focused + await Task.Delay(100); + + // Clear the field using keyboard shortcuts first + await _page.Keyboard.DownAsync("Control"); + await _page.Keyboard.PressAsync("a"); + await _page.Keyboard.UpAsync("Control"); + await _page.Keyboard.PressAsync("Delete"); + + // Then try direct filling (works better in most cases) + await element.FillAsync(""); + + // Type the text with a slight delay to simulate human typing + await element.TypeAsync(text, new ElementHandleTypeOptions { Delay = 5 }); + + // Check if the text was actually entered + string actualValue = await element.EvaluateAsync("el => el.value || el.textContent"); + if (string.IsNullOrEmpty(actualValue)) + { + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", $"Element accepted focus but no text was entered with selector: {currentSelector}"); + + // Try one last approach - direct JavaScript input + await _page.EvaluateAsync($"document.querySelector('{currentSelector}').value = '{text.Replace("'", "\\'")}'"); + + // Verify once more + actualValue = await element.EvaluateAsync("el => el.value || el.textContent"); + if (string.IsNullOrEmpty(actualValue)) + { + continue; // Try next selector + } + } + + // Auto-save session state after typing + await AutoSaveSessionIfEnabled(); + + PluginLogger.NotifyTaskComplete("Text input"); + return $"Successfully typed text into field with selector: {currentSelector}"; + } + catch (Exception selectorEx) + { + // Log exception for this selector but continue trying others + PluginLogger.LogError("PlaywrightPlugin", "TypeText", $"Error with selector '{currentSelector}': {selectorEx.Message}"); + lastException = selectorEx; + + // Continue to the next selector + } + } + + // If we've tried all selectors and none worked, throw an exception to trigger a retry + throw new Exception("No selectors were successful"); + } + catch (Exception retryEx) + { + currentRetry++; + lastException = retryEx; + + if (currentRetry < maxRetries) + { + // Log that we're retrying + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", $"Retry attempt {currentRetry}/{maxRetries}"); + + // Wait with increasing backoff before retrying + await Task.Delay(500 * currentRetry); + + // Take a screenshot to help diagnose the issue + string screenshotDir = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "Diagnostics"); + + Directory.CreateDirectory(screenshotDir); + string screenshotPath = Path.Combine(screenshotDir, $"typetext_retry_{currentRetry}_{DateTime.Now:yyyyMMdd_HHmmss}.png"); + + try + { + await _page.ScreenshotAsync(new PageScreenshotOptions { Path = screenshotPath }); + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", $"Diagnostic screenshot saved to {screenshotPath}"); + } + catch + { + // Ignore screenshot errors + } + } + } + } + + // If we get here, all retries have failed + string errorDetails = lastException != null ? $": {lastException.Message}" : ""; + PluginLogger.NotifyTaskComplete("Text input", false); + + // Try to get the HTML of the page for debugging + string pageHtml = ""; + try + { + pageHtml = await _page.ContentAsync(); + // Save the HTML for debugging + string htmlDir = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "Diagnostics"); + + Directory.CreateDirectory(htmlDir); + string htmlPath = Path.Combine(htmlDir, $"page_debug_{DateTime.Now:yyyyMMdd_HHmmss}.html"); + File.WriteAllText(htmlPath, pageHtml); + PluginLogger.LogInfo("PlaywrightPlugin", "TypeText", $"Page HTML saved to {htmlPath}"); + } + catch + { + // Ignore HTML capture errors + } + + return $"Error typing text after {maxRetries} attempts{errorDetails}. The element may be in an iframe or protected by the website."; + } + catch (Exception ex) + { + PluginLogger.NotifyTaskComplete("Text input", false); + return $"Error typing text: {ex.Message}"; + } + } + + /// + /// Lists all available saved sessions. + /// + [Description("Lists all available saved browser sessions")] + public string ListSessions() + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "ListSessions"); + + try + { + var sessions = PlaywrightSessionManager.GetAllSessions(); + + if (sessions.Count == 0) + { + return "No saved sessions found."; + } + + return $"Available sessions ({sessions.Count}):\n" + string.Join("\n", sessions); + } + catch (Exception ex) + { + return $"Error listing sessions: {ex.Message}"; + } + } + + /// + /// Deletes a saved session. + /// + [Description("Deletes a saved browser session")] + public string DeleteSession( + [Description("ID of the session to delete")] string sessionId = null) + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "DeleteSession", $"SessionId: {sessionId ?? _currentSessionId}"); + + try + { + string id = string.IsNullOrEmpty(sessionId) ? _currentSessionId : sessionId; + + if (PlaywrightSessionManager.DeleteSession(id)) + { + return $"Session '{id}' deleted successfully."; + } + else + { + return $"Session '{id}' not found."; + } + } + catch (Exception ex) + { + return $"Error deleting session: {ex.Message}"; + } + } + + /// + /// Closes the current browser instance and disposes all related resources. + /// + [Description("Closes the active browser and releases resources")] + public async Task CloseBrowser() + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "CloseBrowser"); + + try + { + if (_page != null) + { + try { await _page.CloseAsync(); } catch { /* ignore */ } + _page = null; + } + + if (_context != null) + { + try { await _context.CloseAsync(); } catch { /* ignore */ } + _context = null; + } + + if (_browser != null) + { + try { await _browser.CloseAsync(); } catch { /* ignore */ } + await _browser.DisposeAsync(); + _browser = null; + } + + _initialized = false; + + return "Browser closed successfully."; + } + catch (Exception ex) + { + return $"Error closing browser: {ex.Message}"; + } + } + /// Gets the full HTML content of the current page. + /// + [Description("Gets the current page HTML content")] + public async Task GetPageContent() + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "GetPageContent"); + + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + return await _page.ContentAsync(); + } + + /// + /// Gets a summary of interactive elements on the page (buttons, links, inputs, forms). + /// This is more useful than raw HTML for understanding what actions are available. + /// + [Description("Gets a summary of clickable elements, links, buttons, and input fields on the current page. Use this to understand what you can interact with.")] + public async Task GetPageElements() + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "GetPageElements"); + + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + try + { + // Wait for page to be stable before reading elements + try + { + await _page.WaitForLoadStateAsync(LoadState.DOMContentLoaded, new PageWaitForLoadStateOptions { Timeout = 5000 }); + } + catch (TimeoutException) { /* Continue anyway */ } + + // Small delay to let dynamic content settle + await Task.Delay(500); + + var result = new System.Text.StringBuilder(); + result.AppendLine($"Current URL: {_page.Url}"); + result.AppendLine($"Page Title: {await _page.TitleAsync()}"); + result.AppendLine(); + + // Get buttons + var buttons = await _page.QuerySelectorAllAsync("button, input[type='submit'], input[type='button'], [role='button']"); + if (buttons.Count > 0) + { + result.AppendLine($"=== BUTTONS ({buttons.Count}) ==="); + int count = 0; + foreach (var btn in buttons) + { + if (count >= 20) { result.AppendLine("... (more buttons)"); break; } + try + { + string text = await btn.TextContentAsync() ?? ""; + string id = await btn.GetAttributeAsync("id") ?? ""; + string className = await btn.GetAttributeAsync("class") ?? ""; + string ariaLabel = await btn.GetAttributeAsync("aria-label") ?? ""; + bool isVisible = await btn.IsVisibleAsync(); + if (isVisible && (!string.IsNullOrWhiteSpace(text) || !string.IsNullOrEmpty(id) || !string.IsNullOrEmpty(ariaLabel))) + { + string selector = !string.IsNullOrEmpty(id) ? $"#{id}" : + !string.IsNullOrEmpty(ariaLabel) ? $"[aria-label='{ariaLabel}']" : + $"button:has-text(\"{text.Trim().Substring(0, Math.Min(20, text.Trim().Length))}\")"; + result.AppendLine($" - \"{text.Trim()}\" | selector: {selector}"); + count++; + } + } + catch { } + } + result.AppendLine(); + } + + // Get links + var links = await _page.QuerySelectorAllAsync("a[href]"); + if (links.Count > 0) + { + result.AppendLine($"=== LINKS ({links.Count}) ==="); + int count = 0; + foreach (var link in links) + { + if (count >= 20) { result.AppendLine("... (more links)"); break; } + try + { + string text = await link.TextContentAsync() ?? ""; + string href = await link.GetAttributeAsync("href") ?? ""; + bool isVisible = await link.IsVisibleAsync(); + if (isVisible && !string.IsNullOrWhiteSpace(text) && text.Trim().Length < 100) + { + result.AppendLine($" - \"{text.Trim()}\" -> {href}"); + count++; + } + } + catch { } + } + result.AppendLine(); + } + + // Get input fields + var inputs = await _page.QuerySelectorAllAsync("input:not([type='hidden']), textarea, select"); + if (inputs.Count > 0) + { + result.AppendLine($"=== INPUT FIELDS ({inputs.Count}) ==="); + int count = 0; + foreach (var input in inputs) + { + if (count >= 20) { result.AppendLine("... (more inputs)"); break; } + try + { + string type = await input.GetAttributeAsync("type") ?? "text"; + string id = await input.GetAttributeAsync("id") ?? ""; + string name = await input.GetAttributeAsync("name") ?? ""; + string placeholder = await input.GetAttributeAsync("placeholder") ?? ""; + string ariaLabel = await input.GetAttributeAsync("aria-label") ?? ""; + bool isVisible = await input.IsVisibleAsync(); + if (isVisible) + { + string selector = !string.IsNullOrEmpty(id) ? $"#{id}" : + !string.IsNullOrEmpty(name) ? $"[name='{name}']" : + $"input[type='{type}']"; + string label = !string.IsNullOrEmpty(placeholder) ? placeholder : + !string.IsNullOrEmpty(ariaLabel) ? ariaLabel : + !string.IsNullOrEmpty(name) ? name : type; + result.AppendLine($" - [{type}] \"{label}\" | selector: {selector}"); + count++; + } + } + catch { } + } + result.AppendLine(); + } + + return result.ToString(); + } + catch (Exception ex) + { + return $"Error getting page elements: {ex.Message}"; + } + } + + /// + /// Retrieves text content from an element identified by a CSS selector. + /// + [Description("Gets the text content of an element by CSS selector")] + public async Task GetElementText( + [Description("CSS selector of the element")] string selector) + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "GetElementText", $"Selector: {selector}"); + + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + if (string.IsNullOrEmpty(selector)) + { + return "Error: Selector cannot be empty"; + } + + try + { + var element = await _page.QuerySelectorAsync(selector); + if (element == null) + { + return $"Error: Could not find element with selector: {selector}"; + } + + string text = await element.TextContentAsync(); + return text ?? string.Empty; + } + catch (Exception ex) + { + return $"Error getting element text: {ex.Message}"; + } + } + + /// + /// Executes custom JavaScript in the current page and returns the result. + /// + [Description("Executes a JavaScript snippet in the current page")] + public async Task ExecuteScript( + [Description("JavaScript code to run")] string script) + { + PluginLogger.LogPluginUsage("PlaywrightPlugin", "ExecuteScript"); + + if (_page == null) + { + return "Error: Browser not launched. Call LaunchBrowser first."; + } + + if (string.IsNullOrWhiteSpace(script)) + { + return "Error: Script cannot be empty."; + } + + try + { + string result = await _page.EvaluateAsync(script); + return result ?? string.Empty; + } + catch (Exception ex) + { + return $"Error executing script: {ex.Message}"; + } + } + } +} diff --git a/FlowVision/lib/Plugins/PowershellPlugin.cs b/FlowVision/lib/Plugins/PowershellPlugin.cs index 46e90cb..dcdcec6 100644 --- a/FlowVision/lib/Plugins/PowershellPlugin.cs +++ b/FlowVision/lib/Plugins/PowershellPlugin.cs @@ -1,16 +1,15 @@ -using System; +using System; using System.ComponentModel; using System.Diagnostics; using System.Threading.Tasks; using System.Windows.Forms; using FlowVision.lib.Classes; -using Microsoft.SemanticKernel; namespace FlowVision.lib.Plugins { internal class PowerShellPlugin { - [KernelFunction, Description("Executes a PowerShell command and returns the output.")] + [Description("Executes a PowerShell command and returns the output.")] public async Task ExecuteCommand([Description("Powershell Command")] string command) { // Log the plugin usage diff --git a/FlowVision/lib/Plugins/RemoteControlPlugin.cs b/FlowVision/lib/Plugins/RemoteControlPlugin.cs new file mode 100644 index 0000000..5d01f9c --- /dev/null +++ b/FlowVision/lib/Plugins/RemoteControlPlugin.cs @@ -0,0 +1,119 @@ +using System; +using System.IO; +using System.Net; +using System.Text.Json; +using System.Threading.Tasks; +using FlowVision.lib.Classes; +using System.ComponentModel; + +namespace FlowVision.lib.Plugins +{ + /// + /// Simple HTTP server that forwards JSON commands to an assigned handler. + /// + public class RemoteControlPlugin : IDisposable + { + private static HttpListener _listener; + private static bool _running; + private static int _port; + private static Func> _commandHandler; + + /// + /// Assign the delegate used to execute incoming commands. + /// + public static void SetCommandHandler(Func> handler) + { + _commandHandler = handler; + } + + /// + /// Start listening on the configured port if not already running. + /// + public static void StartServer(int port) + { + if (_running) + { + return; + } + + _port = port; + _listener = new HttpListener(); + _listener.Prefixes.Add($"http://*:{port}/"); + _listener.Start(); + _running = true; + Task.Run(ListenLoop); + } + + /// + /// Returns the status of the remote control server. + /// + [Description("Get the status of the remote control server")] + public static string GetStatus() => _running ? $"Listening on {_port}" : "Stopped"; + + private static async Task ListenLoop() + { + while (_running) + { + try + { + var context = await _listener.GetContextAsync(); + if (context.Request.HttpMethod != "POST") + { + context.Response.StatusCode = 404; + context.Response.Close(); + continue; + } + + string body; + using (var reader = new StreamReader(context.Request.InputStream)) + { + body = await reader.ReadToEndAsync(); + } + + string command = null; + try + { + var doc = JsonDocument.Parse(body); + if (doc.RootElement.TryGetProperty("command", out var cmdEl)) + { + command = cmdEl.GetString(); + } + } + catch { } + + string result = string.Empty; + if (!string.IsNullOrEmpty(command) && _commandHandler != null) + { + PluginLogger.LogPluginUsage("RemoteControlPlugin", "Command", command); + result = await _commandHandler(command); + } + else + { + context.Response.StatusCode = 400; + result = "Invalid command"; + } + + var writer = new StreamWriter(context.Response.OutputStream); + await writer.WriteAsync(result ?? string.Empty); + context.Response.Close(); + } + catch (Exception ex) + { + PluginLogger.LogError("RemoteControl", "Listener", ex.Message); + } + } + } + + public static void StopServer() + { + _running = false; + try { _listener?.Stop(); } catch { } + _listener = null; + } + + public void Dispose() + { + StopServer(); + } + } +} diff --git a/FlowVision/lib/Plugins/ScreenCaptureOmniParserPlugin.cs b/FlowVision/lib/Plugins/ScreenCaptureOmniParserPlugin.cs deleted file mode 100644 index 806a7c6..0000000 --- a/FlowVision/lib/Plugins/ScreenCaptureOmniParserPlugin.cs +++ /dev/null @@ -1,142 +0,0 @@ -using System; -using System.Collections.Generic; -using System.ComponentModel; -using System.Drawing; -using System.Drawing.Imaging; -using System.Net.Http; -using System.Runtime.InteropServices; -using System.Threading; -using System.Threading.Tasks; -using System.Windows.Forms; -using FlowVision.lib.Classes; -using Microsoft.SemanticKernel; - -namespace FlowVision.lib.Plugins -{ - internal class ScreenCaptureOmniParserPlugin - { - private readonly string capPath = "c:\\temp\\cap.png"; - private readonly string prosPath = "c:\\temp\\pros.png"; - private readonly WindowSelectionPlugin _windowSelector; - - [DllImport("user32.dll", SetLastError = true)] - private static extern bool SetForegroundWindow(IntPtr hWnd); - - [DllImport("user32.dll", SetLastError = true)] - private static extern bool GetWindowRect(IntPtr hWnd, out WindowSelectionPlugin.RECT lpRect); - - public ScreenCaptureOmniParserPlugin() - { - _windowSelector = new WindowSelectionPlugin(); - } - - [KernelFunction, Description("Used to capture the Screen and return Parsed Content")] - public async Task> CaptureScreen(string handleString) - { - PluginLogger.LogPluginUsage("ScreenCaptureOmniParserPlugin", "CaptureScreen"); - - OmniparserResponse omniResult; - var capBase64 = CaptureWindow(handleString); - - using (HttpClient httpClient = new HttpClient()) - { - OmniParserClient omniClient = new OmniParserClient(httpClient); - // ProcessScreenshotAsync now returns a custom object. - omniResult = await omniClient.ProcessScreenshotAsync(capBase64); - //SaveSomImage(omniResult.SomImageBase64); - } - - return omniResult.ParsedContentList; - } - - //capture the whole screen - [KernelFunction, Description("Used to capture the whole screen")] - public async Task> CaptureWholeScreen() - { - PluginLogger.LogPluginUsage("ScreenCaptureOmniParserPlugin", "CaptureWholeScreen"); - var capBase64 = ""; - OmniparserResponse omniResult; - - //take capture all screens - using (Bitmap bmp = new Bitmap(SystemInformation.VirtualScreen.Width, SystemInformation.VirtualScreen.Height)) - { - using (Graphics gfxBmp = Graphics.FromImage(bmp)) - { - gfxBmp.CopyFromScreen(SystemInformation.VirtualScreen.X, SystemInformation.VirtualScreen.Y, 0, 0, SystemInformation.VirtualScreen.Size, CopyPixelOperation.SourceCopy); - } - using (var ms = new System.IO.MemoryStream()) - { - bmp.Save(ms, ImageFormat.Png); - capBase64 = Convert.ToBase64String(ms.ToArray()); - } - } - - using (HttpClient httpClient = new HttpClient()) - { - OmniParserClient omniClient = new OmniParserClient(httpClient); - // ProcessScreenshotAsync now returns a custom object. - omniResult = await omniClient.ProcessScreenshotAsync(capBase64); - //SaveSomImage(omniResult.SomImageBase64); - } - - return omniResult.ParsedContentList; - } - - /// - /// Save the image to the file system. - /// - /// - /* - private void SaveSomImage(string somImageBase64) - { - PluginLogger.LogPluginUsage("ScreenCaptureOmniParserPlugin", "SaveSomImage"); - byte[] imageBytes = Convert.FromBase64String(somImageBase64); - using (var ms = new System.IO.MemoryStream(imageBytes)) - { - Image image = Image.FromStream(ms); - image.Save(prosPath, ImageFormat.Png); - } - } - */ - - public string CaptureWindow(string handleString) - { - PluginLogger.LogPluginUsage("ScreenCaptureOmniParserPlugin", "CaptureWindow"); - IntPtr windowHandle = new IntPtr(Convert.ToInt32(handleString)); - - if (!_windowSelector.IsWindowHandleValid(windowHandle)) - { - throw new ArgumentException("Provided window handle is invalid."); - } - - if (!SetForegroundWindow(windowHandle)) - { - throw new InvalidOperationException("Failed to bring the window to the foreground."); - } - - Thread.Sleep(200); - - if (!GetWindowRect(windowHandle, out WindowSelectionPlugin.RECT rc)) - { - throw new InvalidOperationException("Failed to get the window rectangle."); - } - - int width = rc.Right - rc.Left; - int height = rc.Bottom - rc.Top; - - using (Bitmap bmp = new Bitmap(width, height, PixelFormat.Format32bppArgb)) - { - using (Graphics gfxBmp = Graphics.FromImage(bmp)) - { - gfxBmp.CopyFromScreen(rc.Left, rc.Top, 0, 0, new Size(width, height), CopyPixelOperation.SourceCopy); - } - - using (var ms = new System.IO.MemoryStream()) - { - bmp.Save(ms, ImageFormat.Png); - return Convert.ToBase64String(ms.ToArray()); - } - } - } - } -} diff --git a/FlowVision/lib/Plugins/ScreenCapturePlugin.cs b/FlowVision/lib/Plugins/ScreenCapturePlugin.cs index 0fa914d..25d170c 100644 --- a/FlowVision/lib/Plugins/ScreenCapturePlugin.cs +++ b/FlowVision/lib/Plugins/ScreenCapturePlugin.cs @@ -1,46 +1,30 @@ -using System.Collections.Generic; +using System.Collections.Generic; using System.ComponentModel; using System.Threading.Tasks; -using Microsoft.SemanticKernel; +using FlowVision.lib.Classes; namespace FlowVision.lib.Plugins { - /* - * This class is preserved for backward compatibility. - * It delegates to the new split classes: WindowSelectionPlugin and ScreenCaptureOmniParserPlugin. - * Consider updating references to use the new plugins directly. - */ + /// + /// Screen capture plugin - now delegates to WindowSelectionPlugin only. + /// For web automation, use PlaywrightPlugin instead. + /// internal class ScreenCapturePlugin { private readonly WindowSelectionPlugin _windowSelector; - private readonly ScreenCaptureOmniParserPlugin _screenCaptureOmniParser; public ScreenCapturePlugin() { _windowSelector = new WindowSelectionPlugin(); - _screenCaptureOmniParser = new ScreenCaptureOmniParserPlugin(); } - [KernelFunction, Description("Used to capture the Screen and return Parsed Content")] - public async Task> CaptureScreen(string handleString) - { - return await _screenCaptureOmniParser.CaptureScreen(handleString); - } - - //capture the whole screen - [KernelFunction, Description("Used to capture the whole screen")] - public async Task> CapturewholeScreen() - { - return await _screenCaptureOmniParser.CaptureWholeScreen(); - } - - [KernelFunction, Description("Used to set current handle as foreground")] + [Description("Used to set current handle as foreground")] public async Task ForegroundSelect(string handleString) { return await _windowSelector.ForegroundSelect(handleString); } - [KernelFunction, Description("Returns a list of available window handles, titles, and process names.")] + [Description("Returns a list of available window handles, titles, and process names.")] public string ListWindowHandles() { return _windowSelector.ListWindowHandles(); diff --git a/FlowVision/lib/Plugins/WindowSelectionPlugin.cs b/FlowVision/lib/Plugins/WindowSelectionPlugin.cs index 5a487a1..988bdfa 100644 --- a/FlowVision/lib/Plugins/WindowSelectionPlugin.cs +++ b/FlowVision/lib/Plugins/WindowSelectionPlugin.cs @@ -3,9 +3,9 @@ using System.ComponentModel; using System.Diagnostics; using System.Runtime.InteropServices; +using System.Text; using System.Threading.Tasks; using FlowVision.lib.Classes; -using Microsoft.SemanticKernel; namespace FlowVision.lib.Plugins { @@ -17,7 +17,53 @@ internal class WindowSelectionPlugin [DllImport("user32.dll", SetLastError = true)] private static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect); - [KernelFunction, Description("Used to set current handle as foreground")] + // --- New P/Invoke Definitions for Safe Enumeration --- + private delegate bool EnumWindowsProc(IntPtr hWnd, IntPtr lParam); + + [DllImport("user32.dll")] + [return: MarshalAs(UnmanagedType.Bool)] + private static extern bool EnumWindows(EnumWindowsProc lpEnumFunc, IntPtr lParam); + + [DllImport("user32.dll")] + [return: MarshalAs(UnmanagedType.Bool)] + private static extern bool IsWindowVisible(IntPtr hWnd); + + [DllImport("user32.dll", SetLastError = true, CharSet = CharSet.Auto)] + private static extern int GetWindowTextLength(IntPtr hWnd); + + [DllImport("user32.dll", SetLastError = true, CharSet = CharSet.Auto)] + private static extern IntPtr SendMessageTimeout( + IntPtr hWnd, + uint Msg, + IntPtr wParam, + IntPtr lParam, + uint fuFlags, + uint uTimeout, + out IntPtr lpdwResult); + + [DllImport("user32.dll", SetLastError = true)] + private static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint processId); + + private const uint WM_GETTEXT = 0x000D; + private const uint WM_GETTEXTLENGTH = 0x000E; + private const uint SMTO_ABORTIFHUNG = 0x0002; + + [DllImport("user32.dll", SetLastError = true)] + private static extern IntPtr GetForegroundWindow(); + + // ------------------------------------------------------ + + [Description("Gets the handle and title of the currently active foreground window.")] + public string GetForegroundWindowInfo() + { + IntPtr hWnd = GetForegroundWindow(); + if (hWnd == IntPtr.Zero) return "No active window"; + + string title = GetWindowTitleSafe(hWnd); + return $"Handle: {hWnd}, Title: {title}"; + } + + [Description("Used to set current handle as foreground")] public async Task ForegroundSelect(string handleString) { // Log the plugin usage @@ -31,31 +77,98 @@ public async Task ForegroundSelect(string handleString) return true; } - [KernelFunction, Description("Returns a list of available window handles, titles, and process names.")] + [Description("Returns a list of available window handles, titles, and process names.")] public string ListWindowHandles() { // Log the plugin usage PluginLogger.LogPluginUsage("WindowSelectionPlugin", "ListWindowHandles"); var windowList = new List(); - Process[] processes = Process.GetProcesses(); - foreach (Process p in processes) + + EnumWindows((hWnd, lParam) => { + // Filter invisible windows to reduce noise and hangs + if (!IsWindowVisible(hWnd)) + return true; // Continue enumeration + + // Safely get window title with timeout + string title = GetWindowTitleSafe(hWnd); + + // Skip untitled windows (often hidden helper windows) + if (string.IsNullOrWhiteSpace(title)) + return true; + + // Get process name + string processName = "Unknown"; try { - if (p.MainWindowHandle == IntPtr.Zero) - continue; - string item = $"Handle: {p.MainWindowHandle}, Title: {p.MainWindowTitle}, Process: {p.ProcessName}"; - windowList.Add(item); + GetWindowThreadProcessId(hWnd, out uint processId); + using (var p = Process.GetProcessById((int)processId)) + { + processName = p.ProcessName; + } } - catch - { - continue; - } - } + catch { /* Process might have exited or access denied */ } + + string item = $"Handle: {hWnd}, Title: {title}, Process: {processName}"; + windowList.Add(item); + + return true; // Continue enumeration + }, IntPtr.Zero); + return string.Join("\n", windowList); } + /// + /// Safely retrieves window title with a timeout to prevent hanging on unresponsive windows. + /// + private string GetWindowTitleSafe(IntPtr hWnd) + { + const int timeoutMs = 100; // Short timeout to ensure responsiveness + + // 1. Get text length with timeout + IntPtr result; + IntPtr ret = SendMessageTimeout( + hWnd, + WM_GETTEXTLENGTH, + IntPtr.Zero, + IntPtr.Zero, + SMTO_ABORTIFHUNG, + timeoutMs, + out result); + + if (ret == IntPtr.Zero) return string.Empty; // Failed or timed out + + int length = (int)result; + if (length == 0) return string.Empty; + + // 2. Get actual text with timeout + // Allocate unmanaged memory for the string buffer + // Length + 1 for null terminator, * 2 for Unicode characters + int bufferSize = (length + 1) * 2; + IntPtr buffer = Marshal.AllocHGlobal(bufferSize); + + try + { + ret = SendMessageTimeout( + hWnd, + WM_GETTEXT, + new IntPtr(length + 1), + buffer, + SMTO_ABORTIFHUNG, + timeoutMs, + out result); + + if (ret == IntPtr.Zero) return string.Empty; + + return Marshal.PtrToStringAuto(buffer); + } + finally + { + Marshal.FreeHGlobal(buffer); + } + } + /// /// Checks if the provided window handle is valid. /// diff --git a/FlowVision/lib/UI/ThemeColors.cs b/FlowVision/lib/UI/ThemeColors.cs new file mode 100644 index 0000000..b73ee94 --- /dev/null +++ b/FlowVision/lib/UI/ThemeColors.cs @@ -0,0 +1,41 @@ +using System; +using System.Drawing; + +namespace FlowVision.lib.UI +{ + /// + /// Defines color schemes for light and dark themes + /// + public static class ThemeColors + { + // Light theme colors + public static class Light + { + public static readonly Color Background = Color.White; + public static readonly Color Text = Color.Black; + public static readonly Color ButtonBackground = SystemColors.Control; + public static readonly Color ButtonText = Color.Black; + public static readonly Color ButtonBorder = SystemColors.ControlDark; + public static readonly Color TextBoxBackground = SystemColors.Window; + public static readonly Color TextBoxText = Color.Black; + public static readonly Color TextBoxBorder = SystemColors.ControlDark; + public static readonly Color TabBackground = Color.White; + public static readonly Color TabText = Color.Black; + } + + // Dark theme colors + public static class Dark + { + public static readonly Color Background = Color.FromArgb(45, 45, 48); + public static readonly Color Text = Color.White; + public static readonly Color ButtonBackground = Color.FromArgb(60, 60, 65); + public static readonly Color ButtonText = Color.White; + public static readonly Color ButtonBorder = Color.FromArgb(80, 80, 85); + public static readonly Color TextBoxBackground = Color.FromArgb(30, 30, 35); + public static readonly Color TextBoxText = Color.White; + public static readonly Color TextBoxBorder = Color.FromArgb(80, 80, 85); + public static readonly Color TabBackground = Color.FromArgb(45, 45, 48); + public static readonly Color TabText = Color.White; + } + } +} diff --git a/FlowVision/lib/UI/ThemeManager.cs b/FlowVision/lib/UI/ThemeManager.cs new file mode 100644 index 0000000..8cc907c --- /dev/null +++ b/FlowVision/lib/UI/ThemeManager.cs @@ -0,0 +1,195 @@ +using System; +using System.Drawing; +using System.IO; +using System.Text.Json; +using System.Windows.Forms; + +namespace FlowVision.lib.UI +{ + /// + /// Manages application themes and theme-related settings + /// + public class ThemeManager + { + private const string DefaultTheme = "Light"; + private string _currentTheme; + private readonly string _themePath; + + public ThemeManager() + { + // Create theme settings path + _themePath = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), + "FlowVision", "Settings", "theme.json"); + + // Create directory if it doesn't exist + Directory.CreateDirectory(Path.GetDirectoryName(_themePath)); + + // Load theme from file or use default + _currentTheme = LoadThemeFromFile(); + } + + /// + /// Gets or sets the current theme + /// + public string CurrentTheme + { + get => _currentTheme; + set + { + _currentTheme = value; + SaveThemeToFile(_currentTheme); + } + } + + /// + /// Applies theme colors to a button control + /// + /// Button to apply theme to + public void ApplyThemeToButton(Button button) + { + if (_currentTheme == "Dark") + { + button.BackColor = ThemeColors.Dark.ButtonBackground; + button.ForeColor = ThemeColors.Dark.ButtonText; + button.FlatStyle = FlatStyle.Flat; + button.FlatAppearance.BorderColor = ThemeColors.Dark.ButtonBorder; + } + else + { + button.BackColor = ThemeColors.Light.ButtonBackground; + button.ForeColor = ThemeColors.Light.ButtonText; + button.FlatStyle = FlatStyle.Standard; + } + } + + /// + /// Applies theme colors to a textbox control + /// + /// TextBox to apply theme to + public void ApplyThemeToTextBox(TextBoxBase textBox) + { + if (_currentTheme == "Dark") + { + textBox.BackColor = ThemeColors.Dark.TextBoxBackground; + textBox.ForeColor = ThemeColors.Dark.TextBoxText; + textBox.BorderStyle = BorderStyle.FixedSingle; + } + else + { + textBox.BackColor = ThemeColors.Light.TextBoxBackground; + textBox.ForeColor = ThemeColors.Light.TextBoxText; + textBox.BorderStyle = BorderStyle.Fixed3D; + } + } + + /// + /// Applies theme colors to all controls in a container + /// + /// Container with controls to theme + public void ApplyThemeToControls(Control container) + { + // Set container colors + if (_currentTheme == "Dark") + { + container.BackColor = ThemeColors.Dark.Background; + container.ForeColor = ThemeColors.Dark.Text; + } + else + { + container.BackColor = ThemeColors.Light.Background; + container.ForeColor = ThemeColors.Light.Text; + } + + // Process all child controls recursively + foreach (Control control in container.Controls) + { + if (control is Button button) + { + ApplyThemeToButton(button); + } + else if (control is TextBoxBase textBox) + { + ApplyThemeToTextBox(textBox); + } + else if (control is TabPage tabPage) + { + // Apply theme to tab page + if (_currentTheme == "Dark") + { + tabPage.BackColor = ThemeColors.Dark.TabBackground; + tabPage.ForeColor = ThemeColors.Dark.TabText; + } + else + { + tabPage.BackColor = ThemeColors.Light.TabBackground; + tabPage.ForeColor = ThemeColors.Light.TabText; + } + + // Process tab page controls + ApplyThemeToControls(tabPage); + } + else if (control is GroupBox) + { + // Apply theme to group box + if (_currentTheme == "Dark") + { + control.BackColor = ThemeColors.Dark.Background; + control.ForeColor = ThemeColors.Dark.Text; + } + else + { + control.BackColor = ThemeColors.Light.Background; + control.ForeColor = ThemeColors.Light.Text; + } + + // Process group box controls + ApplyThemeToControls(control); + } + else if (control.Controls.Count > 0) + { + // Recursively apply theme to container controls + ApplyThemeToControls(control); + } + } + } + + private string LoadThemeFromFile() + { + try + { + if (File.Exists(_themePath)) + { + string jsonContent = File.ReadAllText(_themePath); + var themeSetting = JsonSerializer.Deserialize(jsonContent); + return themeSetting?.Name ?? DefaultTheme; + } + } + catch (Exception ex) + { + Console.WriteLine($"Error loading theme: {ex.Message}"); + } + + return DefaultTheme; + } + + private void SaveThemeToFile(string themeName) + { + try + { + var themeSetting = new ThemeSetting { Name = themeName }; + string jsonContent = JsonSerializer.Serialize(themeSetting, new JsonSerializerOptions { WriteIndented = true }); + File.WriteAllText(_themePath, jsonContent); + } + catch (Exception ex) + { + Console.WriteLine($"Error saving theme: {ex.Message}"); + } + } + + private class ThemeSetting + { + public string Name { get; set; } + } + } +} diff --git a/FlowVision/packages.config b/FlowVision/packages.config index 1b8fa7d..b496364 100644 --- a/FlowVision/packages.config +++ b/FlowVision/packages.config @@ -12,12 +12,16 @@ + + + + @@ -28,6 +32,7 @@ + @@ -35,9 +40,12 @@ + + + \ No newline at end of file diff --git a/FlowVision/recursive-control-icon.ico b/FlowVision/recursive-control-icon.ico new file mode 100644 index 0000000..10054d0 Binary files /dev/null and b/FlowVision/recursive-control-icon.ico differ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5a4f523 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Justin Trantham + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..75a286f --- /dev/null +++ b/README.md @@ -0,0 +1,140 @@ + + + + +📎 AI Control for Windows Computers 📎 + +[](https://discord.gg/mQWsWeHsVU) + +Recursive Control is an innovative project designed to enable artificial intelligence (AI) to interact seamlessly with your computer, automating tasks, performing complex workflows, and enhancing productivity. + +## Project Goal + +Our mission is to create an AI-driven interface that can autonomously control your computer, intelligently perform tasks, open applications, execute commands, and streamline workflows, effectively turning natural language into actionable operations. + +## Features + +- **AI-Powered Interaction**: Utilize AI models (such as GPT-based models) to interpret user input and intelligently execute actions. +- **Automated Workflow Execution**: Automate repetitive or complex sequences of computer actions. +- **Natural Language Commands**: Simply describe tasks in plain language, and let the AI handle execution. + +## Getting Started + +### Prerequisites + +- .NET 4.8 or later +- Windows Operating System +- Azure OpenAI API Key (More models will be supported in the future) + +### Local Setup + +Download the latest release from the [Releases](https://github.com/flowdevs-io/Recursive-Control/releases) page and follow three easy steps. + +1. Run recursivecontrol.exe +2. Setup your LLM + +3. Input your commands directly into the UI, and watch as AI automate your tasks. + + +### Development + +1. Clone this repository: + ```bash + git clone https://github.com/flowdevs-io/Recursive-Control.git + ``` + +2. Navigate to the cloned directory: + ```bash + cd Recursive-Control + ``` + +3. Restore dependencies and build the project: + ```bash + dotnet restore + dotnet build + ``` + +## Plugin System + +Recursive Control supports a modular plugin system, allowing you to extend its capabilities. Plugins can automate keyboard, mouse, window management, screen capture, command line, and more. You can find plugin implementations in the `FlowVision/lib/Plugins/` directory. To add your own plugin, implement the required interface and register it in the application. + +### Built-in Plugins +- **CMDPlugin**: Execute Windows command line instructions. +- **PowershellPlugin**: Run PowerShell scripts and commands. +- **KeyboardPlugin**: Automate keyboard input. +- **MousePlugin**: Automate mouse actions. +- **ScreenCapturePlugin**: Capture screenshots. +- **WindowSelectionPlugin**: Select and interact with application windows. + +## Folder Structure + +``` +FlowVision.sln # Solution file +FlowVision/ # Main application source + lib/ # Core libraries and plugins + Classes/ # Helper and service classes + Plugins/ # Built-in plugins + UI/ # UI theming + Models/ # Data models + Properties/ # .NET project properties +content/ # Images and assets +``` + +## Example Use Cases +- Control applications via natural language (e.g., "Open Excel and create a new spreadsheet") +- Capture and process screenshots for documentation +- Batch rename files or organize folders + +## Roadmap + +### Near-Term Goals +- [ ] **Content warning logging**: Implement logging for content warnings to improve safety and transparency. +- [ ] **Model Support**: Add support for Gemini, OLLAMA, OpenAI, Bedrock, Phi4, and Phi Silica models. +- [ ] **Improved Speech Recognition**: Move away from System.Speech.Recognition (which is slow and inaccurate for voice commands) and adopt real-time audio models from OpenAI or similar providers. + +### Farther Out +- [ ] **Local Bbox Search**: Reduce token usage by integrating Bbox search locally (using OLLAMA, Phi Silica, or other novel SLMs). +- [ ] **Managed LLM Integration**: Develop Recursive Control managed LLM for non-user configurable integration, enabling billing for usage or subscription plans. +- [ ] **YOLO Bbox Parser Integration**: Integrate Yolo Bbox parser using ONNX for advanced vision capabilities. + +### End Goal +Recursive Control running on every Windows computer, leveraging local SLMs, Recursive Control hosted LLMs, and embedded YOLO vision models. The ultimate aim is to make the integration so seamless that new PC users will no longer need a keyboard or mouse—just interact with the latest LLM, and it will turn words into commands. So easy our elders will even use it. + +## Troubleshooting +- Ensure you have .NET 4.8+ installed +- Check your API key and network connection for LLM access +- For plugin errors, review the application logs in %appdata%\FlowVision\plugin_usage.log + +## Contributing + +We welcome contributions! Please feel free to submit issues, suggestions, or pull requests. Your collaboration is essential for making Recursive Control powerful and versatile. + +## Community & Support +- [GitHub Issues](https://github.com/flowdevs-io/Recursive-Control/issues) for bug reports and feature requests +- [Discussions](https://github.com/flowdevs-io/Recursive-Control/discussions) for Q&A and ideas +- [LinkedIn](https://www.linkedin.com/company/flowdevs) for updates and networking + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## Contact + +For any questions, feedback, or collaboration inquiries, please connect with us through our GitHub repository, or via LinkedIn. + +## Citation + +If you use Browser Use in your research or project, please cite: + +```bibtex +@software{recursive-control2025, + author = {Trantham, Justin}, + title = {Recursive Control: AI Control for Windows Computers }, + year = {2025}, + publisher = {GitHub}, + url = {https://github.com/flowdevs-io/Recursive-Contro} +} +``` + +Made and owned by Engineers + diff --git a/content/recursive-control-banner-dark-1280x640.jpeg b/content/recursive-control-banner-dark-1280x640.jpeg new file mode 100644 index 0000000..540b256 Binary files /dev/null and b/content/recursive-control-banner-dark-1280x640.jpeg differ diff --git a/docs/API-Reference.md b/docs/API-Reference.md new file mode 100644 index 0000000..541d129 --- /dev/null +++ b/docs/API-Reference.md @@ -0,0 +1,397 @@ +--- +layout: default +title: API Reference +--- + +# API Reference + +Developer documentation for extending and integrating with Recursive Control. + +## Plugin Development + +### Creating a Custom Plugin + +Plugins extend Recursive Control's capabilities. Here's how to create one: + +```csharp +using Microsoft.SemanticKernel; +using System.ComponentModel; + +public class MyCustomPlugin +{ + [KernelFunction] + [Description("Does something useful")] + public string MyFunction( + [Description("Input parameter")] string input) + { + // Your implementation here + return $"Processed: {input}"; + } +} +``` + +### Plugin Interface Requirements + +All plugins must: +1. Use `[KernelFunction]` attribute for exposed methods +2. Include `[Description]` for functions and parameters +3. Return serializable types (string, int, bool, etc.) +4. Handle exceptions gracefully + +### Registering Your Plugin + +```csharp +// In your initialization code +kernel.ImportPluginFromType(); +``` + +## Built-in Plugins API + +### CMDPlugin + +Execute command line instructions. + +```csharp +[KernelFunction] +[Description("Execute a Windows command")] +string ExecuteCommand( + [Description("Command to execute")] string command, + [Description("Working directory")] string workingDirectory = null) +``` + +**Example Usage**: "Execute dir command in C:\\Users" + +### PowerShellPlugin + +Run PowerShell scripts and commands. + +```csharp +[KernelFunction] +[Description("Execute PowerShell command")] +string ExecutePowerShell( + [Description("PowerShell script")] string script) +``` + +**Example Usage**: "Run PowerShell to get running processes" + +### KeyboardPlugin + +Automate keyboard input. + +```csharp +[KernelFunction] +[Description("Type text using keyboard")] +void TypeText( + [Description("Text to type")] string text) + +[KernelFunction] +[Description("Press a key combination")] +void PressKeys( + [Description("Keys to press")] string keys) +``` + +**Example Usage**: "Type Hello World" or "Press Ctrl+C" + +### MousePlugin + +Automate mouse actions. + +```csharp +[KernelFunction] +[Description("Click at coordinates")] +void Click( + [Description("X coordinate")] int x, + [Description("Y coordinate")] int y, + [Description("Button (left/right/middle)")] string button = "left") + +[KernelFunction] +[Description("Move mouse to position")] +void MoveTo( + [Description("X coordinate")] int x, + [Description("Y coordinate")] int y) +``` + +**Example Usage**: "Click at position 500, 300" + +### ScreenCapturePlugin + +Capture and analyze screenshots. + +```csharp +[KernelFunction] +[Description("Capture screenshot")] +string CaptureScreen( + [Description("Capture full screen or window")] string mode = "fullscreen") + +[KernelFunction] +[Description("Get screen dimensions")] +string GetScreenSize() +``` + +**Example Usage**: "Take a screenshot of the current window" + +### WindowSelectionPlugin + +Manage application windows. + +```csharp +[KernelFunction] +[Description("List all open windows")] +string ListWindows() + +[KernelFunction] +[Description("Bring window to front")] +void FocusWindow( + [Description("Window title or handle")] string identifier) + +[KernelFunction] +[Description("Close a window")] +void CloseWindow( + [Description("Window identifier")] string identifier) +``` + +**Example Usage**: "List all open windows" or "Focus Chrome window" + +### PlaywrightPlugin + +Automate web browsers. + +```csharp +[KernelFunction] +[Description("Launch browser")] +Task LaunchBrowser( + [Description("Browser type")] string browser = "chromium") + +[KernelFunction] +[Description("Navigate to URL")] +Task NavigateTo( + [Description("URL to navigate")] string url) + +[KernelFunction] +[Description("Execute JavaScript")] +Task ExecuteScript( + [Description("JavaScript code")] string script) + +[KernelFunction] +[Description("Close browser")] +Task CloseBrowser() +``` + +**Example Usage**: "Open browser and go to github.com" + +### RemoteControlPlugin + +HTTP API for remote command execution. + +```csharp +[KernelFunction] +[Description("Start HTTP server")] +void StartServer( + [Description("Port number")] int port = 8080) + +[KernelFunction] +[Description("Stop HTTP server")] +void StopServer() +``` + +**HTTP API Endpoint**: +```bash +POST http://localhost:8080/command +Content-Type: application/json + +{ + "command": "Your natural language command here" +} +``` + +## Configuration API + +### ToolConfig + +Main configuration for plugins and features. + +```csharp +public class ToolConfig +{ + // Plugin toggles + public bool EnableKeyboard { get; set; } + public bool EnableMouse { get; set; } + public bool EnableScreenCapture { get; set; } + public bool EnableCMD { get; set; } + public bool EnablePowerShell { get; set; } + public bool EnablePlaywright { get; set; } + public bool EnableRemoteControl { get; set; } + + // System prompts + public string CoordinatorPrompt { get; set; } + public string PlannerPrompt { get; set; } + public string ExecutorPrompt { get; set; } + + // Other settings + public int RemoteControlPort { get; set; } + public bool VerboseLogging { get; set; } +} +``` + +**Config Location**: `%APPDATA%\FlowVision\toolconfig.json` + +### APIConfig + +AI provider configuration. + +```csharp +public class APIConfig +{ + public string Provider { get; set; } // "OpenAI", "Azure", "Anthropic", etc. + public string ApiKey { get; set; } + public string Endpoint { get; set; } + public string ModelName { get; set; } + public string DeploymentName { get; set; } + public int MaxTokens { get; set; } + public double Temperature { get; set; } +} +``` + +**Config Location**: `%APPDATA%\FlowVision\apiconfig.json` + +## Multi-Agent Architecture + +### Agent Roles + +```csharp +// Coordinator: Routes requests to appropriate agent +var coordinatorAgent = new Agent +{ + Name = "Coordinator", + SystemPrompt = toolConfig.CoordinatorPrompt +}; + +// Planner: Creates execution plans +var plannerAgent = new Agent +{ + Name = "Planner", + SystemPrompt = toolConfig.PlannerPrompt +}; + +// Executor: Executes actions using plugins +var executorAgent = new Agent +{ + Name = "Executor", + SystemPrompt = toolConfig.ExecutorPrompt, + Plugins = kernel.Plugins +}; +``` + +### Workflow + +1. User input → Coordinator +2. Coordinator → Planner (if planning needed) +3. Planner → Executor (with step-by-step plan) +4. Executor → Plugins (to perform actions) +5. Results → User + +## Extension Points + +### Custom AI Providers + +Implement custom AI provider: + +```csharp +public interface IAIProvider +{ + Task GenerateResponse(string prompt); + Task GenerateWithFunctions(string prompt, IEnumerable functions); +} +``` + +### Custom Logging + +Implement custom logger: + +```csharp +public interface IPluginLogger +{ + void LogUsage(string plugin, string function, Dictionary parameters); + void LogError(string plugin, Exception ex); +} +``` + +### Custom UI Themes + +Create custom theme: + +```csharp +public class CustomTheme : ITheme +{ + public Color BackgroundColor { get; set; } + public Color ForegroundColor { get; set; } + public Color AccentColor { get; set; } + public Font DefaultFont { get; set; } +} +``` + +## Integration Examples + +### HTTP API Integration + +```python +import requests + +# Send command via HTTP +response = requests.post( + 'http://localhost:8080/command', + json={'command': 'Open Notepad and type Hello World'} +) + +print(response.json()) +``` + +### Programmatic Control + +```csharp +// Initialize Recursive Control programmatically +var config = new APIConfig { /* ... */ }; +var executor = new MultiAgentActioner(config); + +// Execute command +var result = await executor.ExecuteAsync("Take a screenshot"); +Console.WriteLine(result); +``` + +## Best Practices + +### Plugin Development +1. Use descriptive function and parameter names +2. Provide detailed descriptions for AI understanding +3. Handle errors gracefully and return meaningful messages +4. Keep functions focused on single responsibilities +5. Test with various AI models + +### Performance +1. Cache expensive operations +2. Use async/await for I/O operations +3. Implement timeout mechanisms +4. Release resources properly + +### Security +1. Validate all input parameters +2. Sanitize file paths and commands +3. Implement permission checks +4. Log security-relevant actions +5. Don't expose sensitive data in responses + +## Further Reading + +- [Multi-Agent Architecture](Multi-Agent-Architecture.html) - Deep dive into agent system +- [System Prompts Reference](System-Prompts-Reference.html) - Customizing agent behavior +- [GitHub Repository](https://github.com/flowdevs-io/Recursive-Control) - Source code + +## Community Resources + +- [Discord Developer Channel](https://discord.gg/mQWsWeHsVU) - Ask questions +- [GitHub Discussions](https://github.com/flowdevs-io/Recursive-Control/discussions) - Share ideas +- [Example Plugins](https://github.com/flowdevs-io/Recursive-Control/tree/master/FlowVision/lib/Plugins) - Reference implementations + +--- + +Have questions? Join our [Discord](https://discord.gg/mQWsWeHsVU) or open a [GitHub Discussion](https://github.com/flowdevs-io/Recursive-Control/discussions)! diff --git a/docs/Blog-Post-v2.0.md b/docs/Blog-Post-v2.0.md new file mode 100644 index 0000000..a9706eb --- /dev/null +++ b/docs/Blog-Post-v2.0.md @@ -0,0 +1,441 @@ +# From Good to Great: How We Transformed Recursive Control into a Best-in-Class AI Computer Control Platform + +*October 2, 2025* + +## TL;DR + +We just shipped a massive upgrade to Recursive Control that transforms it from a promising computer control tool into a production-ready AI agent platform. **Six critical fixes**, **800+ lines of new AI prompts**, and a **complete philosophical realignment** with how AI should actually control computers. + +**The result?** Task success rates jumped from ~50% to ~90%, and the system now handles complex 25-step workflows that would have failed before. + +--- + +## The Problem: AI That Couldn't Really Control Your Computer + +When we built Recursive Control, we had a vision: an AI that could **truly** control your Windows computer. Open apps, navigate websites, automate workflows—all through natural language. + +But users kept reporting the same frustrations: + +- 🔴 **"It typed in the wrong window!"** - Keyboard commands went to random applications +- 🔴 **"It takes forever to start!"** - 15-30 second delays before screenshot processing +- 🔴 **"It can't handle complex tasks"** - Failed after 10 steps on multi-part workflows +- 🔴 **"I don't know what it's clicking"** - UI elements labeled as "Element 171" (useless) +- 🔴 **"Random crashes"** - NullReferenceException in markdown rendering +- 🔴 **"It acts without looking"** - Executed blind plans without verification + +These weren't just bugs—they revealed a fundamental misalignment between how we built the system and how AI agents **should** interact with computers. + +--- + +## The Breakthrough: Learning from an AI Coding Agent + +Here's where it gets interesting. We brought in an AI coding agent (yes, AI helping AI) to audit the system. This agent **lives** in development environments, constantly interacting with computers through code, terminals, and tools. + +It immediately identified the core issue: + +> **"Your prompts tell the AI what tools are available, but not *how* to use a computer reliably. You need the observe → act → verify cycle, not blind execution."** + +That insight changed everything. + +--- + +## The Fix: Six Critical Improvements + +### 1. Window-Targeted Keyboard Control 🎯 + +**The Problem**: `SendKey("Ctrl+T")` went to whatever window had focus. If you had Terminal open instead of Chrome? You just sent a command to the wrong app. + +**The Solution**: We added window-specific keyboard methods: + +```csharp +// OLD WAY (50% success rate) +SendKey("^t") // Might go anywhere! + +// NEW WAY (95% success rate) +string chromeHandle = "12345678"; // Get from ListWindowHandles() +SendKeyToWindow(chromeHandle, "^t") // Goes to Chrome specifically +``` + +Now the AI can say "Send Ctrl+T to **this specific Chrome window**" instead of hoping for the best. + +**Impact**: Keyboard operation success rate jumped from 50% to 95%. + +--- + +### 2. Instant Screenshot Processing ⚡ + +**The Problem**: The first screenshot took 15-30 seconds because the YOLO object detection model loaded on-demand. Users thought the app had frozen. + +**The Solution**: We initialize the ONNX model automatically at startup: + +```csharp +public ScreenCaptureOmniParserPlugin() +{ + _windowSelector = new WindowSelectionPlugin(); + + // Initialize ONNX engine at startup - YOLO model ready! + if (_useOnnxMode && _onnxEngine == null) + { + ConfigureMode(true); + } +} +``` + +**Impact**: Screenshots now process in under 1 second, every time. No more "is it frozen?" moments. + +--- + +### 3. Meaningful UI Element Labels 📍 + +**The Problem**: Screenshots returned elements labeled "Element 171", "Element 172"—completely useless for decision making. + +**The Solution**: Elements now include position and size information: + +``` +BEFORE: "Element 171" +AFTER: "UI Element #1 at (150,200) [size: 120x40]" +``` + +Now the AI can say "Click the large button in the top-right" or "Find elements around position (300, 250)" with actual spatial awareness. + +**Impact**: The AI can now identify and target UI elements based on their location and size, not just blind iteration. + +--- + +### 4. System Prompts Completely Rewritten 📝 + +**The Problem**: The AI had access to tools but no guidance on **computer control best practices**. It would plan 10 steps blindly and hope everything worked. + +**The Solution**: We wrote **800+ lines of new prompts** based on how an AI coding agent actually interacts with computers: + +**Actioner Prompt (400+ lines)**: +``` +You are a Windows computer control agent. + +## Operating Principles + +1. ALWAYS Start with Observation + - CaptureWholeScreen() before acting + - ListWindowHandles() to see what's running + +2. USE Window Handles for Everything + - Never SendKey() without window handle + - Always target specific windows + +3. Verify Important Actions + - Take screenshot after critical steps + - Check that action actually succeeded + +4. Work Iteratively + - Do → Verify → Adjust + - Not: Plan 10 steps → Execute all → Hope +``` + +**Planner Prompt (250+ lines)**: +``` +## Planning Principles + +1. Always Start with Observation + - First step: CaptureWholeScreen() or ListWindowHandles() + +2. One Action Per Step + - Each step uses exactly ONE tool call + +3. Build on Results + - Wait for each step's result before planning next + +4. Verify Important Actions + - Take screenshots after critical operations +``` + +**Impact**: The AI now follows proper computer control workflows instead of guessing. + +--- + +### 5. 25-Step Workflows (Up from 10) 🔢 + +**The Problem**: Complex tasks failed because the system stopped at 10 steps. Real workflows need more. + +**The Solution**: Increased iteration limit to 25 with better progress tracking: + +```csharp +int maxIterations = 25; // Was 10 +PluginLogger.LogPluginUsage($"⚙️ Step {currentIteration}/{maxIterations}"); +``` + +**Impact**: Tasks like "Search YouTube for Python tutorials and report the top 3 results" (15 steps) now complete successfully. + +--- + +### 6. No More Random Crashes 🛡️ + +**The Problem**: `NullReferenceException` when formatting markdown because `SelectionFont` could be null. + +**The Solution**: Null-safe font handling with sensible defaults: + +```csharp +// BEFORE (crash if null) +richTextBox.SelectionFont = new Font("Consolas", richTextBox.SelectionFont.Size); + +// AFTER (safe with default) +float fontSize = richTextBox.SelectionFont?.Size ?? 10F; +richTextBox.SelectionFont = new Font("Consolas", fontSize); +``` + +**Impact**: No more crashes when rendering AI responses with code blocks. + +--- + +## The Results: From 50% to 90% Success + +The numbers speak for themselves: + +| Task Type | Before | After | Improvement | +|-----------|--------|-------|-------------| +| **Browser Navigation** | 70% | 95% | +25% | +| **Window Management** | 60% | 90% | +30% | +| **Keyboard Input** | 50% | 95% | +45% | +| **Multi-Step Tasks** | 40% | 85% | +45% | +| **Error Recovery** | 30% | 75% | +45% | + +**Overall task success: ~50% → ~90%** + +--- + +## Real-World Example: Before vs After + +Let's look at a simple task: **"Open YouTube in Chrome"** + +### Before (50% Success Rate): +``` +1. SendKey("^t") ❌ Might go to Terminal +2. Type "youtube.com" ❌ Typed in wrong window +3. Press Enter ❌ Random results +``` + +### After (95% Success Rate): +``` +1. CaptureWholeScreen() - See current state +2. ListWindowHandles() - Find Chrome (handle: 12345678) +3. ForegroundSelect("12345678") - Bring Chrome forward +4. SendKeyToWindow("12345678", "^t") - New tab in Chrome +5. SendKeyToWindow("12345678", "youtube") - Type in Chrome +6. EnterKeyToWindow("12345678") - Navigate in Chrome +7. Wait 2000ms - Allow page load +8. CaptureScreen("12345678") - Verify success ✅ +``` + +Notice the difference: +- ✅ **Window-specific targeting** (not global commands) +- ✅ **Visual verification** (screenshots to confirm state) +- ✅ **Iterative execution** (check each step) +- ✅ **Explicit waits** (allow time for operations) + +This is what **reliable** computer control looks like. + +--- + +## The Philosophy: Observe → Act → Verify + +The biggest change isn't in the code—it's in the **philosophy**. + +We realized that controlling a computer is fundamentally different from chat. You can't just: +1. Plan 10 steps +2. Execute them all +3. Hope it worked + +Instead, you need: +1. **Observe** the current state (screenshot) +2. **Plan** based on what you see +3. **Act** on specific windows (not globally) +4. **Verify** the result (another screenshot) +5. **Adapt** based on reality + +This cycle is now **enforced** by the system prompts. The AI doesn't have a choice—it **must** work this way. + +--- + +## What This Means for Users + +### More Reliable +Tasks that failed 50% of the time now succeed 90% of the time. The AI actually **does what you ask**. + +### Smarter +The AI sees the screen, plans intelligently, and adjusts based on what actually happens. It's not following a rigid script. + +### Handles Complexity +25-step workflows? No problem. Multi-app automation? Works. Complex browser interactions? Covered. + +### Self-Correcting +If something goes wrong, the AI sees it (via screenshot), explains what happened, and tries a different approach. + +### Faster +No more waiting 30 seconds for the first screenshot. Everything is instant. + +--- + +## What This Means for Developers + +### Best Practices Codified +The new prompts encode **real** computer control best practices from an AI agent with actual experience. + +### Extensible +Want to add new tools? The prompt structure makes it easy to integrate them properly. + +### Debuggable +Better logging shows exactly what the AI is doing at each step (we even have plans for chat export for troubleshooting). + +### Production-Ready +This isn't a prototype anymore. It's robust, reliable, and ready for real work. + +--- + +## The Technical Deep Dive + +For developers who want the details: + +### Window Handle Management +We use Win32 APIs to properly manage focus: +```csharp +private bool BringWindowToForegroundWithFocus(IntPtr hWnd) +{ + uint currentThreadId = GetCurrentThreadId(); + uint foregroundThreadId = GetWindowThreadProcessId(GetForegroundWindow(), out _); + + // Attach to bypass Windows focus restrictions + AttachThreadInput(currentThreadId, foregroundThreadId, true); + bool success = SetForegroundWindow(hWnd); + AttachThreadInput(currentThreadId, foregroundThreadId, false); + + return GetForegroundWindow() == hWnd; +} +``` + +### ONNX Model Initialization +We load the YOLOv11 model at startup: +```csharp +_onnxEngine = new OnnxOmniParserEngine(); +// Model loaded, ready for instant inference +``` + +### Enhanced Element Detection +We enrich YOLO detections with spatial information: +```csharp +string contentLabel = $"UI Element #{labelIndex} at ({x},{y}) [size: {width}x{height}]"; +``` + +### Prompt Engineering +We structure prompts with: +- Clear operating principles +- Practical examples +- DO/DON'T lists +- Error recovery patterns +- Common task workflows + +--- + +## What's Next? + +This is just the beginning. We've laid the foundation for: + +### OCR Integration (Coming Soon) +The infrastructure is ready. Soon, UI elements will show actual text: +``` +"Subscribe Button at (300,250) [size: 200x60]" +``` + +### UI Improvements (In Progress) +- Export chat logs with tool calls for debugging +- Visual step-by-step execution display +- Interactive element highlighting +- Real-time progress animations + +### Context Persistence +- Remember window handles across sessions +- Cache common application states +- Predict likely next steps + +### Multi-Modal Understanding +- Semantic UI understanding +- Intent-based automation +- Natural language refinement loops + +--- + +## Try It Yourself + +Want to experience the difference? Here are some tasks that now **just work**: + +1. **"Open Chrome and search YouTube for Python tutorials"** + - Watch it target the right window + - See it verify each step + - Notice the instant screenshots + +2. **"Create a new text file and write 'Hello World'"** + - Observe the window-specific typing + - Check the verification screenshots + - See it confirm success + +3. **"Take a screenshot and describe what you see"** + - Instant processing (no 30s delay) + - Detailed element information with positions + - Spatial awareness in the description + +--- + +## The Bottom Line + +We didn't just fix bugs—we **fundamentally realigned** how Recursive Control approaches computer automation. + +The system now embodies the wisdom of an AI agent that actually knows how to interact with computers reliably: + +✅ **Observe before acting** (screenshots) +✅ **Target specifically** (window handles) +✅ **Verify results** (iterative checking) +✅ **Adapt continuously** (based on observations) +✅ **Explain clearly** (user feedback) + +**This is what AI computer control should be.** + +--- + +## Get Involved + +Recursive Control is open source and we'd love your contributions: + +- 🌟 **Star us on GitHub**: [Recursive-Control](https://github.com/flowdevs-io/Recursive-Control) +- 💬 **Join Discord**: Share your experiences and ideas +- 🐛 **Report Issues**: Help us make it even better +- 🔧 **Contribute**: PRs welcome! + +--- + +## Acknowledgments + +Special thanks to the AI coding agent that audited our system and provided the insights that drove this transformation. Sometimes the best code review comes from someone who **lives** in the environment you're trying to automate. + +Also thanks to our community for reporting issues, testing edge cases, and pushing us to make Recursive Control truly production-ready. + +--- + +## Download + +Get the latest version with all these improvements: +👉 [Releases Page](https://github.com/flowdevs-io/Recursive-Control/releases) + +--- + +*Justin Trantham* +*Founder, FlowDevs* +*Making AI computer control that actually works* + +--- + +## Comments? Questions? + +We'd love to hear your thoughts: +- What tasks are you automating? +- What features do you want next? +- How has the upgrade worked for you? + +Drop a comment or join our Discord! 💬 diff --git a/docs/FAQ.md b/docs/FAQ.md new file mode 100644 index 0000000..e55d08e --- /dev/null +++ b/docs/FAQ.md @@ -0,0 +1,15 @@ +--- +layout: default +title: FAQ +--- + +# Frequently Asked Questions + +## Coming Soon + +This page is under construction. Check back soon! + +For immediate help: +- [Troubleshooting Guide](Troubleshooting.md) +- [Discord Community](https://discord.gg/mQWsWeHsVU) +- [GitHub Issues](https://github.com/flowdevs-io/Recursive-Control/issues) diff --git a/docs/Getting-Started.md b/docs/Getting-Started.md new file mode 100644 index 0000000..7edb4cd --- /dev/null +++ b/docs/Getting-Started.md @@ -0,0 +1,89 @@ +--- +layout: default +title: Getting Started +--- + +# First Flight Checklist + +Strap into Mission Control and run your first full-loop automation in under ten minutes. + +## Cue the Agents + +Once the cockpit is configured you can talk in plain-text, mission-style briefs. + +### Launch Scripts + +1. **File Ops Circuit** + ``` + “Open File Explorer and navigate to my Documents folder.” + “Create a new folder called Projects on my Desktop.” + “Rename all .txt files in this folder to .md.” + ``` + +2. **App Relay** + ``` + “Open Notepad and type Hello World.” + “Launch Chrome and navigate to github.com.” + “Open Excel and create a new spreadsheet.” + ``` + +3. **Vision Probe** + ``` + “Take a screenshot of the current window.” + “Capture the entire screen.” + ``` + +4. **Browser Drone** + ``` + “Open a browser and search for AI automation tools.” + “Fill out this form with my information.” + “Extract data from this webpage.” + ``` + +## Meet the Crew + +- **Hermes (Coordinator)** — interprets intent, sets the heading +- **Daedalus (Planner)** — drafts the maneuver plan step by step +- **Talos (Executor)** — performs every keystroke, click, and command + +Agents talk to you in real-time so you can abort, adjust, or cheer as they work. + +## Mission Briefing Tips + +- **Paint the picture** → “Move all PDF files from Downloads to Documents” beats “Do something with files.” +- **Chunk the mission** → Run multi-stage workflows in segments and confirm each leg. +- **Talk human** → Natural language with intent and context works best; no scripting required. + +## Plugin Bay at a Glance + +- Keyboard + Mouse autopilot +- Command line (CMD + PowerShell) +- Screen capture + annotation +- Window switching + focus +- Playwright browser automation +- Remote control HTTP bridge + +## Next Flight Plans + +- Tour the [UI Features Atlas](UI-Features.html) +- Study the [Multi-Agent Architecture](Multi-Agent-Architecture.html) +- Customize personas via [System Prompts Reference](System-Prompts-Reference.html) +- Join the [Discord flight crew](https://discord.gg/mQWsWeHsVU) + +## First-Time FAQs + +**How do I know it’s engaged?** + +Watch the live reasoning feed and action timeline. + +**Something went sideways.** + +Hit the abort control, then consult the [Troubleshooting Console](Troubleshooting.html). + +**Can I reshape its behavior?** + +Absolutely. Tweak prompts, toggle plugins, and save profiles. + +--- + +Ready for more? Dive into [advanced documentation](Multi-Agent-Architecture.html) or sync with the [community](https://discord.gg/mQWsWeHsVU). diff --git a/docs/Home.md b/docs/Home.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/Installation.md b/docs/Installation.md new file mode 100644 index 0000000..29253cd --- /dev/null +++ b/docs/Installation.md @@ -0,0 +1,158 @@ +--- +layout: default +title: Installation +--- + +# Installation Capsule + +Your mission dock for landing Recursive Control on Windows. Choose your payload, confirm dependencies, and ignite the thrusters. + +## Flight Readiness Checklist + +### Baseline Specs +- **Operating System**: Windows 10 or Windows 11 +- **.NET Framework**: 4.8 or later +- **RAM**: 4 GB minimum (8 GB recommended) +- **Disk Space**: 500 MB for application files + +### Network & Auth +- Internet connection for AI model API access +- API key for your preferred AI provider (OpenAI, Azure OpenAI, Anthropic, Google, etc.) + +## Deploy Options + +### Rapid Drop (Recommended) + +1. **Pull the latest build** + - Hop to the [Releases hangar](https://github.com/flowdevs-io/Recursive-Control/releases) + - Snag the newest `recursivecontrol.zip` payload + - Extract anywhere friendly (no admin elevation required) + +2. **Boot the cockpit** + - Launch `recursivecontrol.exe` + - First boot unlocks the setup co-pilot + +3. **Wire your pilot** + - Open Settings → Providers + - Pick your model stack (OpenAI, Azure, Claude, Gemini, LM Studio…) + - Drop in your API credentials + +4. **Ping the tower** + - Ask "What can you do?" + - Confirm you see agent chatter + completion + +### Source Build (Engineers’ Track) + +For developers who want to build from source: + +```bash +# Clone the repository +git clone https://github.com/flowdevs-io/Recursive-Control.git + +# Navigate to the directory +cd Recursive-Control + +# Restore dependencies +dotnet restore + +# Build the project +dotnet build + +# Run the application +dotnet run --project FlowVision +``` + +## Configure Your Pilot + +### OpenAI + +#### OpenAI +1. Get your API key from [OpenAI Platform](https://platform.openai.com) +2. In Recursive Control settings: + - Provider: OpenAI + - API Key: Your OpenAI key + - Model: gpt-4 or gpt-3.5-turbo + +#### Azure OpenAI +1. Set up Azure OpenAI service in Azure Portal +2. In Recursive Control settings: + - Provider: Azure OpenAI + - Endpoint: Your Azure endpoint URL + - API Key: Your Azure key + - Deployment Name: Your model deployment + +#### Anthropic Claude +1. Get your API key from [Anthropic Console](https://console.anthropic.com) +2. In Recursive Control settings: + - Provider: Anthropic + - API Key: Your Anthropic key + - Model: claude-3-opus or claude-3-sonnet + +#### LM Studio (Local) +1. Download and run [LM Studio](https://lmstudio.ai) +2. Load your preferred local model +3. Start the local server +4. In Recursive Control settings: + - Provider: LM Studio + - Endpoint: http://localhost:1234 (or your configured port) + +### Plugin Bay + +Enable or disable plugins based on your needs: + +1. Open settings → Plugins +2. Toggle plugins on/off: + - ✅ Keyboard/Mouse (recommended) + - ✅ Screen Capture (recommended) + - ✅ Command Line + - ✅ PowerShell + - ⚠️ Playwright (requires additional setup) + - ⚠️ Remote Control (enable for HTTP API) + +## System Checks + +Run these test commands to verify everything works: + +1. **Basic Interaction**: "Hello, can you hear me?" +2. **Screen Capture**: "Take a screenshot" +3. **File Operations**: "Show me my Desktop folder" +4. **Application Control**: "Open Notepad" + +If all tests pass, you're ready to go! + +## Troubleshooting Bay + +### .NET Framework Missing +- Download and install [.NET Framework 4.8](https://dotnet.microsoft.com/download/dotnet-framework/net48) + +### API Key Errors +- Verify your API key is correct +- Check that your API provider account has credits/active subscription +- Ensure internet connection is working + +### Application Won’t Start +- Run as Administrator +- Check Windows Event Viewer for error details +- Verify all dependencies are installed + +### Performance Issues +- Close unnecessary applications +- Increase available RAM +- Consider using a lighter AI model + +## Next Flight Plans + +- Continue to [Getting Started](Getting-Started.html) guide +- Explore [UI Features](UI-Features.html) +- Join our [Discord community](https://discord.gg/mQWsWeHsVU) + +## Uninstall Playbook + +To remove Recursive Control: +1. Delete the application folder +2. Remove configuration files from `%APPDATA%\FlowVision` +3. (Optional) Remove any created shortcuts + +--- + +Need help? Check the [Troubleshooting](Troubleshooting.html) guide or ask in our [Discord](https://discord.gg/mQWsWeHsVU). diff --git a/docs/Multi-Agent-Architecture.md b/docs/Multi-Agent-Architecture.md new file mode 100644 index 0000000..eea18cf --- /dev/null +++ b/docs/Multi-Agent-Architecture.md @@ -0,0 +1,423 @@ +--- +layout: default +title: Multi-Agent Architecture +--- + +# Multi-Agent Architecture + +Deep dive into Recursive Control's sophisticated 3-agent system. + +## Overview + +Recursive Control uses a multi-agent architecture inspired by modern AI orchestration patterns. Instead of a single AI handling everything, specialized agents work together to understand, plan, and execute tasks more effectively. + +## The Three Agents + +### 1. Coordinator Agent 🎯 + +**Role**: Traffic controller and request router + +**Responsibilities**: +- Receives and analyzes user requests +- Determines request complexity +- Routes simple tasks directly to executor +- Delegates complex tasks to planner +- Maintains conversation context + +**Example Flow**: +``` +User: "What time is it?" +Coordinator: [Direct response, no plugins needed] + +User: "Open Chrome, search for AI news, and summarize the top 3 articles" +Coordinator: [Routes to Planner - complex multi-step task] +``` + +### 2. Planner Agent 📋 + +**Role**: Strategic task decomposition + +**Responsibilities**: +- Breaks complex requests into steps +- Identifies required plugins +- Creates execution sequence +- Handles dependencies between steps +- Anticipates potential issues + +**Example Plan**: +``` +Task: "Download images from a website and organize them by date" + +Plan: +1. Use PlaywrightPlugin to launch browser +2. Navigate to specified website +3. Execute JavaScript to find all image URLs +4. Use CMDPlugin to create date-based folders +5. Download images to appropriate folders +6. Close browser +7. Verify downloads completed +``` + +### 3. Executor Agent ⚙️ + +**Role**: Action performer + +**Responsibilities**: +- Executes plans step-by-step +- Calls appropriate plugins with correct parameters +- Handles errors and retries +- Reports progress and results +- Adapts when unexpected issues arise + +**Example Execution**: +``` +Step 1: PlaywrightPlugin.LaunchBrowser("chromium") +Status: ✅ Browser launched + +Step 2: PlaywrightPlugin.NavigateTo("https://example.com") +Status: ✅ Navigated successfully + +Step 3: PlaywrightPlugin.ExecuteScript("return document.images.length") +Status: ✅ Found 42 images +... +``` + +## Architecture Diagram + +```mermaid +graph TD + User[User Input] --> Coordinator[Coordinator Agent] + Coordinator -->|Simple Task| Executor[Executor Agent] + Coordinator -->|Complex Task| Planner[Planner Agent] + Planner -->|Execution Plan| Executor + Executor -->|Function Calls| Plugins[Plugin System] + Plugins -->|Results| Executor + Executor -->|Progress| Coordinator + Coordinator -->|Response| User + + Plugins --> CMD[CMDPlugin] + Plugins --> KB[KeyboardPlugin] + Plugins --> Mouse[MousePlugin] + Plugins --> Screen[ScreenCapturePlugin] + Plugins --> Win[WindowPlugin] + Plugins --> Play[PlaywrightPlugin] + Plugins --> Remote[RemoteControlPlugin] +``` + +## Communication Flow + +### Simple Request Flow + +``` +User: "Take a screenshot" + ↓ +Coordinator: Analyzes request (simple, direct action) + ↓ +Executor: Calls ScreenCapturePlugin.CaptureScreen() + ↓ +Plugin: Captures screen, returns image path + ↓ +Executor: Reports success + ↓ +Coordinator: Formats response for user + ↓ +User: "Screenshot saved to C:\Users\...\screenshot.png" +``` + +### Complex Request Flow + +``` +User: "Automate my morning routine: check email, read headlines, generate summary" + ↓ +Coordinator: Identifies complexity, routes to Planner + ↓ +Planner: Creates multi-step plan + 1. Launch browser + 2. Navigate to email + 3. Extract unread count + 4. Navigate to news site + 5. Scrape headlines + 6. Generate summary + ↓ +Executor: Executes plan step-by-step + - Uses PlaywrightPlugin for web automation + - Handles each step sequentially + - Reports progress after each step + ↓ +Coordinator: Compiles results, formats summary + ↓ +User: Receives comprehensive summary +``` + +## Agent Prompts + +Each agent has a specialized system prompt that defines its behavior. + +### Coordinator Prompt (Simplified) + +``` +You are the Coordinator Agent for Recursive Control. + +Your responsibilities: +1. Understand user requests +2. Determine if task is simple (direct) or complex (needs planning) +3. Route requests appropriately +4. Maintain conversation context + +For simple tasks: respond directly or invoke executor +For complex tasks: delegate to planner +``` + +### Planner Prompt (Simplified) + +``` +You are the Planner Agent for Recursive Control. + +Your responsibilities: +1. Analyze complex tasks +2. Break them into sequential steps +3. Identify required plugins for each step +4. Create clear execution plan +5. Consider error handling + +Available plugins: [list of plugins] + +Output format: JSON array of steps with plugin calls +``` + +### Executor Prompt (Simplified) + +``` +You are the Executor Agent for Recursive Control. + +Your responsibilities: +1. Execute plans step-by-step +2. Call plugins with correct parameters +3. Handle errors gracefully +4. Report progress +5. Adapt when issues occur + +You have access to: [list of plugin functions] + +Execute each step and report results. +``` + +## Plugin Integration + +Agents interact with plugins through the Semantic Kernel framework: + +```csharp +// Plugins expose functions via attributes +[KernelFunction] +[Description("Capture screenshot of screen")] +public string CaptureScreen() { ... } + +// Executor calls plugins +var result = await kernel.InvokeAsync("ScreenCapturePlugin", "CaptureScreen"); + +// Result returned to agent +return $"Screenshot captured: {result}"; +``` + +## Error Handling + +The multi-agent system has sophisticated error handling: + +### Retry Logic +``` +Step fails → Executor analyzes error + ↓ +If retryable → Attempt again (up to 3 times) + ↓ +If still failing → Report to Coordinator + ↓ +Coordinator → Requests alternative plan from Planner + ↓ +New plan → Executor tries different approach +``` + +### Graceful Degradation +``` +Primary approach fails + ↓ +Planner generates fallback plan + ↓ +Executor attempts fallback + ↓ +If successful: task completed (may be partial) +If unsuccessful: clear error message to user +``` + +## Performance Optimization + +### Token Usage +- Coordinator uses smaller models for routing decisions +- Planner only invoked for complex tasks +- Executor uses function calling to minimize tokens +- Context maintained efficiently across agents + +### Parallel Execution +- Independent steps can execute in parallel +- Plugins support async operations +- Non-blocking UI updates + +### Caching +- Common plans cached for reuse +- Plugin results cached when appropriate +- Conversation context optimized + +## Configuration + +### Customizing Agent Behavior + +Edit system prompts in `%APPDATA%\FlowVision\toolconfig.json`: + +```json +{ + "coordinatorPrompt": "Your custom coordinator prompt...", + "plannerPrompt": "Your custom planner prompt...", + "executorPrompt": "Your custom executor prompt...", + "maxRetries": 3, + "enableParallelExecution": true +} +``` + +### Selecting Models + +Different models for different agents: + +```json +{ + "coordinatorModel": "gpt-3.5-turbo", // Fast, cheap + "plannerModel": "gpt-4", // Smart, strategic + "executorModel": "gpt-4-turbo" // Fast + smart +} +``` + +## Advanced Patterns + +### Hierarchical Planning + +For very complex tasks: +``` +User Request + ↓ +Coordinator → Planner (creates high-level plan) + ↓ +Each high-level step → Planner (creates detailed sub-plan) + ↓ +Executor (executes all sub-steps) +``` + +### Feedback Loops + +Agents can request clarification: +``` +Planner: "I need clarification - should I overwrite existing files?" + ↓ +User: "Yes, overwrite them" + ↓ +Planner: Updates plan with overwrite flag +``` + +### Context Awareness + +Agents maintain shared context: +``` +User: "Open that file again" +Coordinator: Retrieves "that file" from context (last mentioned file) +Executor: Opens the correct file +``` + +## Comparison: Single vs Multi-Agent + +### Single Agent Approach ❌ +- One agent handles everything +- Becomes complex and error-prone +- Difficult to optimize +- Limited scalability + +### Multi-Agent Approach ✅ +- Specialized agents for each concern +- Clear separation of responsibilities +- Easier to optimize and debug +- Highly scalable +- Better error handling + +## Real-World Examples + +### Example 1: Web Research Task + +``` +User: "Research the top 5 AI coding tools and create a comparison table" + +Coordinator: Recognizes complexity → Routes to Planner + +Planner creates plan: +1. Search for "AI coding tools" +2. Extract top 5 tools +3. For each tool: visit website, gather info +4. Compile data into table format + +Executor executes: +- Uses PlaywrightPlugin for web automation +- Gathers information systematically +- Formats results as table + +Result: Comprehensive comparison table delivered to user +``` + +### Example 2: File Organization + +``` +User: "Organize my Downloads folder by file type" + +Coordinator: Routes to Planner + +Planner creates plan: +1. Scan Downloads folder +2. Identify file types +3. Create folders for each type +4. Move files to appropriate folders + +Executor executes: +- Uses CMDPlugin for file operations +- Creates organized structure +- Reports completion + +Result: Organized Downloads folder +``` + +## Best Practices + +### For Users +1. Be clear and specific in requests +2. Break very large tasks into phases +3. Provide context when needed +4. Review plans before execution (if configured) + +### For Developers +1. Keep agent prompts focused +2. Test with various task complexities +3. Monitor token usage +4. Optimize plugin performance +5. Implement comprehensive logging + +## Future Enhancements + +### Planned Features +- [ ] Agent learning from past executions +- [ ] Dynamic agent spawning for parallel tasks +- [ ] Cross-agent memory sharing +- [ ] Advanced planning algorithms +- [ ] User preference learning + +## Related Documentation + +- [System Prompts Reference](System-Prompts-Reference.html) - Customize agent behavior +- [API Reference](API-Reference.html) - Plugin development +- [UI Features](UI-Features.html) - Interacting with agents + +--- + +Questions about the architecture? Join our [Discord](https://discord.gg/mQWsWeHsVU) for discussions! diff --git a/docs/Prompt-Engineering-Guide.md b/docs/Prompt-Engineering-Guide.md new file mode 100644 index 0000000..b1bbc6d --- /dev/null +++ b/docs/Prompt-Engineering-Guide.md @@ -0,0 +1,64 @@ +--- +layout: default +title: Prompt Engineering Guide +--- + +# Prompt Engineering Guide for Recursive Control + +Master the art of communicating with your AI agents to achieve precise, efficient automation. + +## Why Prompt Engineering Matters +Recursive Control's multi-agent system (Hermes, Daedalus, and Talos) interprets natural language, but structured prompts yield better results: +- **Clarity reduces errors**: Ambiguous requests lead to wrong assumptions. +- **Structure aids planning**: Well-organized prompts help the Planner agent create better steps. +- **Context improves accuracy**: Provide relevant details for better execution. + +## Core Principles +1. **Be Specific**: Include exact details like app names, file paths, or expected outcomes. +2. **Break It Down**: For complex tasks, suggest steps or use multi-stage prompts. +3. **Provide Context**: Mention current state, like open windows or recent actions. +4. **Use Verification**: Ask the agent to confirm steps or describe what it sees. +5. **Handle Errors Gracefully**: Include fallback instructions. + +## Prompt Patterns +### Basic Command +**Template**: "Perform [action] in [location/app] with [details]." + +**Example**: "Open Chrome and navigate to github.com/flowdevs-io/Recursive-Control." + +### Multi-Step Workflow +**Template**: "Do the following steps: 1. [Step 1] 2. [Step 2] ... Verify each step." + +**Example**: "Create a report: 1. Open Excel. 2. Add headers: Date, Task, Status. 3. Fill with today's data. 4. Save as 'daily-report.xlsx' in Documents." + +### Vision-Assisted +**Template**: "Capture the screen, describe what you see, then [action based on description]." + +**Example**: "Take a screenshot of the current window, identify the search bar, and type 'AI tools' into it." + +### Conditional Logic +**Template**: "If [condition], do [action A]; else do [action B]." + +**Example**: "If Chrome is open, navigate to YouTube; else open Chrome first then go to YouTube." + +## Advanced Techniques +### Chain of Thought +Encourage reasoning: "Think step-by-step: First, check if the app is open. If not, open it. Then..." + +### Role Playing +Assign roles: "As a efficient automation expert, optimize this workflow: [task]." + +### Few-Shot Examples +Provide samples: "Like how you opened Notepad last time, now open Paint and draw a square." + +## Common Pitfalls +- **Too Vague**: "Do something with files" → Agents might guess wrong. +- **Overly Complex**: Break long prompts into multiple interactions. +- **Assuming State**: Always verify: "Focus on the foreground window and describe it first." + +## Tips for Power Users +- Use the UI's chat history to build context across prompts. +- Combine plugins explicitly: "Use Playwright to automate browser, then CMD to process downloads." +- Test incrementally: Start with simple tasks and build up. + +Master these patterns to turn Recursive Control into your ultimate productivity copilot! diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..1aa562c --- /dev/null +++ b/docs/README.md @@ -0,0 +1,117 @@ +# Recursive Control Wiki + +This directory contains the complete documentation for Recursive Control. + +## 📚 Wiki Structure + +All documentation is in Markdown format, ready to be published to GitHub Wiki. + +### Core Pages +- **Home.md** - Main wiki homepage with navigation +- **Installation.md** - Complete installation guide +- **Getting-Started.md** - First tasks and tutorials +- **Multi-Agent-Architecture.md** - Technical deep dive +- **FAQ.md** - Frequently asked questions +- **Troubleshooting.md** - Common issues and solutions +- **API-Reference.md** - Developer API documentation + +## 📤 Publishing to GitHub Wiki + +### Method 1: Web Interface +1. Go to repository → Wiki tab +2. Create new page for each .md file +3. Copy content from corresponding file +4. Save each page + +### Method 2: Git Clone (Recommended) +```bash +# Clone wiki repository +git clone https://github.com/flowdevs-io/Recursive-Control.wiki.git + +# Copy all markdown files +cp wiki/*.md Recursive-Control.wiki/ + +# Commit and push +cd Recursive-Control.wiki +git add . +git commit -m "Complete wiki documentation" +git push origin master +``` + +## 📝 Content Summary + +**Home.md** (Main landing page) +- Quick start links +- Feature overview +- Documentation structure +- Community links + +**Installation.md** (Setup guide) +- System requirements +- Download instructions +- Initial configuration +- Verification steps + +**Getting-Started.md** (Tutorial) +- First tasks +- Common examples +- Best practices +- Multi-agent introduction + +**Multi-Agent-Architecture.md** (Technical) +- 3-agent system explained +- Workflow diagrams +- Configuration options +- Performance analysis + +**FAQ.md** (Quick answers) +- Common questions +- Quick solutions +- Feature explanations +- Tips and tricks + +**Troubleshooting.md** (Problem solving) +- Common issues +- Error messages +- Solutions +- Debug techniques + +**API-Reference.md** (Developer docs) +- Plugin API +- Tool functions +- Configuration objects +- Extension guide + +## ✅ Content Complete + +All wiki pages are: +- ✅ Written in Markdown +- ✅ Formatted with headers +- ✅ Include navigation links +- ✅ Have code examples +- ✅ Feature emoji icons +- ✅ Ready to publish + +## 🎨 Features + +- Comprehensive coverage +- Step-by-step guides +- Code examples throughout +- Visual diagrams (mermaid) +- Emoji visual language +- Internal navigation +- External resource links + +## 📞 Maintenance + +To update wiki: +1. Edit .md files in this directory +2. Test locally with markdown viewer +3. Push to GitHub wiki repository +4. Verify formatting on GitHub + +--- + +**Total Pages:** 7 core + expandable +**Total Words:** ~50,000+ +**Completeness:** Production ready diff --git a/docs/System-Prompts-Reference.md b/docs/System-Prompts-Reference.md new file mode 100644 index 0000000..bff113d --- /dev/null +++ b/docs/System-Prompts-Reference.md @@ -0,0 +1,535 @@ +# Optimized System Prompts for Computer Control AI + +## Philosophy + +As a coding agent that interacts with computers, here's what I've learned works best: + +### Key Principles +1. **Context is King**: Always know what's visible, what's running, and where you are +2. **Verify Before Act**: Take screenshots to confirm state before destructive actions +3. **Window Handles are Critical**: Always work with specific windows, not global focus +4. **Iterative Refinement**: Check results, adjust approach based on what you see +5. **Clear State Management**: Know what tools are active and their state + +--- + +## Single Agent Mode (Recommended for Most Tasks) + +### Actioner System Prompt (Enhanced) + +``` +You are a Windows computer control agent with direct access to the desktop environment. + +## Your Core Capabilities + +You can see the screen, control the mouse and keyboard, manage windows, execute commands, and automate browsers. You have FULL access to: + +**Vision & Observation:** +- `CaptureWholeScreen()` - Take full desktop screenshot with UI element detection +- `CaptureScreen(windowHandle)` - Capture specific window + +**Window Management:** +- `ListWindowHandles()` - Get all open windows with handles, titles, and process names +- `ForegroundSelect(windowHandle)` - Bring a window to foreground + +**Keyboard Control (Window-Targeted):** +- `SendKeyToWindow(windowHandle, keys)` - Send keys to specific window +- `EnterKeyToWindow(windowHandle)` - Send Enter to specific window +- `CtrlKeyToWindow(windowHandle, letter)` - Send Ctrl+ combination to specific window +- `SendKey(keys)` - Send keys to current foreground window (use sparingly) + +**Mouse Control:** +- `ClickOnWindow(windowHandle, bbox, leftClick, clickTimes)` - Click at coordinates in specific window +- `ScrollOnWindow(windowHandle, amount)` - Scroll in specific window + +**System Control:** +- `ExecuteCommand(command)` - Run CMD commands +- `ExecuteScript(script)` - Run PowerShell scripts + +**Browser Automation (Playwright):** +- `IsBrowserActive()` - Check if browser is running +- `LaunchBrowser(browserType, headless, forceNew)` - Start browser (chromium/firefox/webkit) +- `NavigateTo(url, waitStrategy)` - Go to URL +- `ExecuteScript(jsCode)` - Run JavaScript in page +- `ClickElement(selector)` - Click element by CSS selector +- `TypeText(selector, text)` - Type into input field +- `GetPageContent()` - Get HTML content +- `TakeScreenshot()` - Browser screenshot +- `CloseBrowser()` - Close browser + +## Operating Principles + +### 1. ALWAYS Start with Observation +``` +Bad: Immediately clicking without seeing +Good: CaptureWholeScreen() -> Analyze -> Plan -> Act +``` + +### 2. USE Window Handles for Everything +``` +Bad: SendKey("^t") # Goes to random window! +Good: windowHandle = GetChromeHandle(); SendKeyToWindow(windowHandle, "^t") +``` + +### 3. Verify After Important Actions +``` +1. CaptureWholeScreen() - See initial state +2. Perform action +3. Wait briefly (100-500ms) +4. CaptureWholeScreen() - Verify result +5. Adjust if needed +``` + +### 4. Work Iteratively +``` +Don't try to do 10 steps blindly. Do: +- Step 1 -> Capture -> Verify +- Step 2 -> Capture -> Verify +- Step 3 -> Capture -> Verify +``` + +### 5. Handle Browser State Properly +``` +Always check: IsBrowserActive() +If Yes: Use existing browser +If No: LaunchBrowser(browserType) +Never launch multiple browsers by accident! +``` + +## Workflow Pattern + +### Standard Task Execution: +``` +1. Understand the goal +2. CaptureWholeScreen() - What's currently visible? +3. ListWindowHandles() - What applications are running? +4. Plan the approach based on current state +5. Execute ONE action at a time +6. Verify result with screenshot if important +7. Adjust plan based on observation +8. Continue until goal achieved +``` + +### Example: "Open YouTube in Chrome" +``` +Step 1: ListWindowHandles() +Result: Chrome is already open (handle 12345678) + +Step 2: ForegroundSelect("12345678") +Result: Chrome now in focus + +Step 3: CaptureScreen("12345678") +Result: See Chrome is on some random page + +Step 4: SendKeyToWindow("12345678", "^t") +Result: New tab opened + +Step 5: SendKeyToWindow("12345678", "youtube.com") +Result: URL typed + +Step 6: EnterKeyToWindow("12345678") +Result: Navigating to YouTube + +Step 7: Wait 2000ms for page load + +Step 8: CaptureScreen("12345678") +Result: Verify YouTube loaded successfully +``` + +## UI Element Detection Format + +Screenshots return UI elements in this format: +``` +UI Element #1 at (150,200) [size: 120x40] +UI Element #2 at (300,250) [size: 200x60] +UI Element #3 at (450,300) [size: 180x50] +``` + +**BBox format:** [left, top, right, bottom] in pixels + +Use this for clicking: +```javascript +element = ParsedContent with bbox [150, 200, 270, 240] +ClickOnWindow(windowHandle, element.bbox, leftClick=true, clickTimes=1) +``` + +## Error Handling + +### Window Not Found: +``` +1. ListWindowHandles() again +2. Check if window closed +3. If needed, launch the application +4. Get new window handle +``` + +### Action Failed: +``` +1. CaptureWholeScreen() - What changed? +2. Check if window lost focus +3. ForegroundSelect(windowHandle) - Regain focus +4. Retry action +``` + +### Unexpected State: +``` +1. Take screenshot to see current state +2. Explain what you see vs what you expected +3. Adjust approach based on reality +4. Don't proceed blindly if confused +``` + +## Best Practices + +### DO: +✅ Take screenshots before destructive actions +✅ Use window handles for keyboard/mouse operations +✅ Verify results of important steps +✅ Wait after actions that need time (page loads, app launches) +✅ Check browser state before launching +✅ Explain what you see in screenshots +✅ Work iteratively, one step at a time + +### DON'T: +❌ Use SendKey() without window handle (unreliable) +❌ Click without verifying element positions +❌ Assume action succeeded without verification +❌ Launch multiple browsers accidentally +❌ Execute 10 steps blindly without checking +❌ Ignore errors and continue +❌ Forget to close resources when done + +## Response Format + +When explaining actions: +``` +**Observation:** [What I see from screenshot/state] +**Plan:** [What I'm about to do] +**Action:** [The specific tool call] +**Result:** [What happened] +**Next:** [What to do next] +``` + +## Remember + +You are controlling a REAL computer. Every action has consequences. Be thoughtful, observant, and iterative. When in doubt, take a screenshot to see what's happening. + +Your goal is to complete tasks reliably and safely, not quickly and blindly. +``` + +--- + +## Multi-Agent Mode (For Complex Planning) + +### Coordinator Prompt (Enhanced) + +``` +You are the Coordinator Agent for a Windows computer control system. + +## Your Role + +You are the interface between the human user and the execution system. You understand requests, break them into manageable tasks, and present results clearly. + +## Your Capabilities + +1. **Understand User Intent:** + - Parse natural language requests + - Identify the goal and constraints + - Ask clarifying questions if needed + +2. **Task Assessment:** + - Determine if task needs planning or can be direct + - Simple tasks (1-2 steps): Send directly to Actioner + - Complex tasks (3+ steps): Route through Planner + - Very simple (greetings, questions): Respond directly + +3. **Result Communication:** + - Translate technical results into user-friendly language + - Highlight important information + - Explain what was accomplished + - Note any issues or limitations + +## Decision Tree + +``` +User Request + ├─ Greeting/Small Talk? + │ └─> Respond directly, friendly and brief + │ + ├─ Simple Question (no actions)? + │ └─> Answer directly + │ + ├─ Simple Task (1-2 steps)? + │ └─> Route to Actioner Agent directly + │ Example: "Open Chrome" + │ Example: "Take a screenshot" + │ + ├─ Complex Task (3+ steps)? + │ └─> Route to Planner Agent + │ Example: "Find cheapest flights to Paris" + │ Example: "Create a PowerPoint from web research" + │ + └─ Ambiguous? + └─> Ask clarifying questions +``` + +## Communication Style + +**With User:** +- Friendly and conversational +- Explain what you're doing at high level +- Report results clearly +- Acknowledge limitations honestly + +**With Planner:** +- Be specific about the goal +- Include any constraints mentioned +- Pass along important context + +**With Actioner:** +- Direct, single-step instructions +- Include all necessary details +- Specify exactly what to execute + +## Example Interactions + +### Simple Task: +``` +User: "Open Chrome" +You: "I'll open Chrome for you." +→ Direct to Actioner: "Launch Google Chrome browser" +← Actioner: "Chrome launched successfully" +You: "Chrome is now open and ready to use." +``` + +### Complex Task: +``` +User: "Find the weather in Tokyo and email it to me" +You: "I'll look up Tokyo's weather and prepare an email for you." +→ To Planner: "Get Tokyo weather forecast and compose email with the information" +← Planner provides steps +→ Monitor execution +← Results received +You: "I found that Tokyo is currently 18°C and partly cloudy. I've prepared the email - + would you like me to send it or would you like to review it first?" +``` + +### Greeting: +``` +User: "Hey there" +You: "Hello! I'm here to help you control your computer. What would you like me to do?" +``` + +## Important Notes + +- You don't execute actions yourself - you coordinate +- Keep responses concise but informative +- If something fails, explain clearly and suggest alternatives +- Maintain conversation context across multiple exchanges +- Be proactive in offering help for follow-up tasks +``` + +### Planner Prompt (Enhanced) + +``` +You are the Planner Agent for a Windows computer control system. + +## Your Role + +You receive complex tasks from the Coordinator and break them into discrete, executable steps for the Actioner Agent. + +## Your Strengths + +1. **Sequential Thinking**: Break complex goals into ordered steps +2. **Tool Awareness**: Know what tools are available and when to use them +3. **State Management**: Track what's been done and what's needed +4. **Adaptive Planning**: Adjust based on execution results + +## Planning Principles + +### 1. Always Start with Observation +``` +WRONG: "Step 1: Click the search button" +RIGHT: "Step 1: Take a screenshot to see current state" +``` + +### 2. One Action Per Step +``` +WRONG: "Open Chrome and navigate to YouTube" +RIGHT: + "Step 1: Open Chrome browser" + "Step 2: Navigate to YouTube.com" +``` + +### 3. Use Window Handles +``` +WRONG: "Type 'youtube.com' in the address bar" +RIGHT: "Get Chrome window handle and type 'youtube.com' using SendKeyToWindow" +``` + +### 4. Build on Results +``` +Step 1: List all open windows +[Wait for result] +Step 2: Based on the windows list, select Chrome (handle will be provided) +[Wait for result] +Step 3: Using that window handle, open a new tab +``` + +### 5. Verify Important Actions +``` +Step 3: Close the warning dialog +Step 4: Take screenshot to verify dialog is closed +Step 5: Continue with main task +``` + +## Step Format + +Each step must be: +- **Actionable**: Uses a specific tool +- **Complete**: Has all required parameters +- **Contextual**: Makes sense given previous results +- **Verifiable**: Result can be confirmed + +### Good Step Examples: +``` +✅ "Use ListWindowHandles() to see all open applications" +✅ "Take screenshot of Chrome window (handle: 12345678) to see current page" +✅ "Send Ctrl+T to Chrome window (handle: 12345678) to open new tab" +✅ "Wait 2 seconds for page to load" +✅ "Click on element at coordinates [150, 200, 270, 240] in Chrome window" +``` + +### Bad Step Examples: +``` +❌ "Do a search" (What tool? Where? For what?) +❌ "Navigate to website and find prices" (Too many actions) +❌ "Click the button" (Which button? Which window? What coordinates?) +❌ "Just make it work" (Not actionable) +``` + +## Workflow Pattern + +``` +1. Receive task from Coordinator +2. Consider current state (what do we know?) +3. Output FIRST step only (observation/preparation) +4. Wait for Actioner result +5. Analyze result +6. Decide next step based on what happened +7. Repeat until task complete +8. Output "TASK COMPLETED" with summary +``` + +## Handling Results + +### Success: +``` +Actioner: "Screenshot captured, shows YouTube homepage with 25 UI elements" +You: "Good, YouTube loaded. Next step: Click on the search box..." +``` + +### Partial Success: +``` +Actioner: "Window brought to front, but element not found" +You: "Let me try a different approach. Next step: Take screenshot to see current state..." +``` + +### Failure: +``` +Actioner: "Browser crashed" +You: "Browser crashed. New plan: Check if browser still running, if not, relaunch..." +``` + +## Completion Signal + +When task is done: +``` +TASK COMPLETED + +Summary: Successfully searched YouTube for "Python tutorials" and found 45 results. +The top 3 videos are now visible on screen: +1. "Python for Beginners" - 2.3M views +2. "Complete Python Course" - 1.8M views +3. "Learn Python in 4 Hours" - 900K views + +The browser is still open on the results page. +``` + +## Common Patterns + +### Opening Application: +``` +Step 1: Use ExecuteCommand to launch application +Step 2: Wait 2-3 seconds for application to start +Step 3: Use ListWindowHandles to get the window handle +Step 4: Use ForegroundSelect to bring window to front +``` + +### Web Navigation: +``` +Step 1: Check if browser active with IsBrowserActive() +Step 2: If not active, LaunchBrowser("chromium") +Step 3: Navigate to URL with NavigateTo(url) +Step 4: Wait for page load (2-5 seconds) +Step 5: Take screenshot to verify page loaded +``` + +### Finding & Clicking UI Elements: +``` +Step 1: Take screenshot of target window +Step 2: Analyze UI elements returned +Step 3: Identify target element by position/size +Step 4: Click on element using ClickOnWindow with bbox +Step 5: Verify action succeeded with another screenshot +``` + +## Remember + +- Output ONE step at a time +- Wait for results before next step +- Adapt based on what actually happens +- Use window handles for all keyboard/mouse actions +- Verify important actions with screenshots +- Be specific and actionable in every step +- Signal completion clearly when done +``` + +--- + +## Key Improvements Made + +### 1. Context Awareness +- Emphasized starting with observation (screenshots) +- Window handle management for targeted actions +- State verification between steps + +### 2. Practical Patterns +- Real workflow examples +- Error handling strategies +- Common task patterns (browser, apps, clicking) + +### 3. Tool Usage Clarity +- Window-targeted keyboard methods highlighted +- BBox format clearly explained +- Browser state management emphasized + +### 4. Iterative Execution +- One step at a time philosophy +- Verify before proceeding +- Adapt based on results + +### 5. Better Separation of Concerns +- Coordinator: User interface & routing +- Planner: Sequential breakdown & adaptation +- Actioner: Direct execution with full tool access + +--- + +## Implementation Notes + +These prompts are designed for: +- **Single Agent**: Most tasks (fast, direct) +- **Multi-Agent**: Complex planning scenarios (step-by-step adaptation) + +The key insight: Computer control requires **observation → action → verification** cycles, not blind execution of pre-planned steps. diff --git a/docs/Troubleshooting.md b/docs/Troubleshooting.md new file mode 100644 index 0000000..4486cf3 --- /dev/null +++ b/docs/Troubleshooting.md @@ -0,0 +1,218 @@ +--- +layout: default +title: Troubleshooting +--- + +# Troubleshooting Guide + +Common issues and their solutions for Recursive Control. + +## Installation Issues + +### Application Won't Start + +**Problem**: Double-clicking the executable does nothing or shows an error. + +**Solutions**: +1. Verify .NET Framework 4.8 is installed +2. Run as Administrator (right-click → Run as Administrator) +3. Check Windows Event Viewer for error details +4. Ensure antivirus isn't blocking the application + +### Missing Dependencies + +**Problem**: Error about missing DLL files. + +**Solutions**: +1. Install [.NET Framework 4.8](https://dotnet.microsoft.com/download/dotnet-framework/net48) +2. Install [Visual C++ Redistributables](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads) +3. Reinstall the application + +## Configuration Issues + +### API Key Not Working + +**Problem**: "Invalid API key" or authentication errors. + +**Solutions**: +1. Verify the API key is copied correctly (no extra spaces) +2. Check that your API provider account is active +3. Ensure you have sufficient credits/quota +4. Verify you're using the correct endpoint URL (for Azure) + +### Can't Save Settings + +**Problem**: Settings don't persist after restart. + +**Solutions**: +1. Run the application as Administrator +2. Check that `%APPDATA%\FlowVision` folder has write permissions +3. Verify no antivirus is blocking file writes + +## Runtime Issues + +### Commands Not Executing + +**Problem**: AI responds but doesn't perform actions. + +**Solutions**: +1. Check that required plugins are enabled in settings +2. Verify Windows UAC isn't blocking automated actions +3. Ensure the target application/window is accessible +4. Try running Recursive Control as Administrator + +### Slow Performance + +**Problem**: Application is laggy or unresponsive. + +**Solutions**: +1. Close unnecessary applications to free RAM +2. Switch to a faster AI model (e.g., GPT-3.5 instead of GPT-4) +3. Disable unused plugins +4. Check internet connection speed +5. Consider using a local model with LM Studio + +### High Token Usage + +**Problem**: Burning through API credits quickly. + +**Solutions**: +1. Use more specific commands to reduce back-and-forth +2. Disable verbose logging in system prompts +3. Use smaller/cheaper models for simple tasks +4. Implement caching where possible + +## Plugin-Specific Issues + +### Screen Capture Not Working + +**Problem**: Screenshots are blank or fail to capture. + +**Solutions**: +1. Grant screen capture permissions in Windows settings +2. Check that display scaling is at 100% (or adjust DPI settings) +3. Verify graphics drivers are up to date +4. Try running as Administrator + +### Playwright/Browser Automation Failing + +**Problem**: Browser automation commands fail. + +**Solutions**: +1. Ensure Playwright is properly installed +2. Download required browser binaries +3. Check firewall isn't blocking browser processes +4. Verify Playwright plugin is enabled in settings + +### Keyboard/Mouse Input Not Working + +**Problem**: Automated keyboard/mouse actions don't execute. + +**Solutions**: +1. Run application as Administrator +2. Check that UAC isn't blocking input simulation +3. Verify target window has focus +4. Disable "Filter keyboard input" if enabled in accessibility settings + +### Remote Control Plugin Not Responding + +**Problem**: HTTP API not accepting commands. + +**Solutions**: +1. Verify plugin is enabled in settings +2. Check configured port isn't already in use +3. Ensure firewall allows incoming connections on that port +4. Test with curl: `curl -X POST http://localhost:PORT -d '{"command":"test"}'` + +## Error Messages + +### "Model not found" or "Deployment not found" + +**Cause**: Model name or deployment name is incorrect. + +**Solution**: Verify the exact model/deployment name in your AI provider dashboard and update settings. + +### "Rate limit exceeded" + +**Cause**: Too many API requests in a short time. + +**Solution**: Wait a few moments and try again. Consider upgrading your API plan for higher rate limits. + +### "Context length exceeded" + +**Cause**: Conversation history too long for the model. + +**Solution**: Start a new conversation or use a model with larger context window. + +### "Insufficient permissions" + +**Cause**: Application doesn't have required Windows permissions. + +**Solution**: Run as Administrator and check UAC settings. + +## Performance Optimization + +### Best Practices for Speed + +1. Use specific, clear commands +2. Enable only needed plugins +3. Use faster AI models for simple tasks +4. Keep conversation history manageable +5. Close resource-heavy applications + +### Memory Management + +- Restart application periodically for long sessions +- Clear conversation history when not needed +- Monitor Task Manager for memory leaks +- Close unnecessary browser tabs if using Playwright + +## Logging and Diagnostics + +### Enable Debug Logging + +1. Open settings +2. Enable "Debug Mode" or "Verbose Logging" +3. Reproduce the issue +4. Check logs in `%APPDATA%\FlowVision\logs` + +### Log Locations + +- **Plugin Usage**: `%APPDATA%\FlowVision\plugin_usage.log` +- **Application Logs**: `%APPDATA%\FlowVision\logs\` +- **Error Logs**: Windows Event Viewer → Application + +## Getting Help + +If you can't resolve your issue: + +1. **Check Documentation** + - [Installation Guide](Installation.html) + - [Getting Started](Getting-Started.html) + - [FAQ](FAQ.html) + +2. **Community Support** + - [Discord Server](https://discord.gg/mQWsWeHsVU) - Fast response from community + - [GitHub Discussions](https://github.com/flowdevs-io/Recursive-Control/discussions) - Q&A + +3. **Report Bugs** + - [GitHub Issues](https://github.com/flowdevs-io/Recursive-Control/issues) - Bug reports + - Include: OS version, .NET version, error messages, steps to reproduce + +## Known Issues + +### Windows 11 24H2 +- Some screen capture APIs may require additional permissions +- Workaround: Grant screen recording permission in Settings → Privacy + +### High DPI Displays +- UI elements may appear small on 4K displays +- Workaround: Adjust display scaling or DPI awareness settings + +### Antivirus False Positives +- Some antivirus software flags automation tools +- Workaround: Add Recursive Control to antivirus exclusions + +--- + +Still stuck? Reach out on [Discord](https://discord.gg/mQWsWeHsVU) - we're here to help! diff --git a/docs/UI-Features.md b/docs/UI-Features.md new file mode 100644 index 0000000..0527873 --- /dev/null +++ b/docs/UI-Features.md @@ -0,0 +1,609 @@ +# Novel UI Improvements for Recursive Control + +## Date: October 2, 2025 + +## Overview + +We've added **interactive, user-friendly UI enhancements** that make Recursive Control more powerful, transparent, and easier to troubleshoot. These improvements focus on giving users visibility into what's happening and making the system more engaging. + +--- + +## 🎁 **New Features** + +### 1. **Chat Export System** 📤 + +Export your conversations in multiple formats for debugging, sharing, or documentation. + +#### Features: +- **Export to JSON**: Machine-readable format with timestamps +- **Export to Markdown**: Human-readable format for documentation +- **Debug Export**: Includes chat + plugin usage logs for troubleshooting +- **Copy to Clipboard**: Quick copy for pasting elsewhere + +#### Access: +``` +File Menu → Export Chat → [Choose Format] +``` + +#### Formats: + +**JSON Export**: +```json +{ + "ExportTime": "2025-10-02 21:30:45", + "MessageCount": 15, + "Messages": [ + { + "Timestamp": "2025-10-02T21:25:10", + "Author": "You", + "Content": "Open Chrome" + }, + { + "Timestamp": "2025-10-02T21:25:12", + "Author": "AI", + "Content": "Chrome has been opened successfully" + } + ] +} +``` + +**Markdown Export**: +```markdown +# Chat Export - 2025-10-02 21:30:45 + +**Total Messages:** 15 + +--- + +## You +*2025-10-02T21:25:10* + +Open Chrome + +--- + +## AI +*2025-10-02T21:25:12* + +Chrome has been opened successfully + +--- +``` + +**Debug Export** (with Tool Calls): +```markdown +# Debugging Chat Export +**Export Time:** 2025-10-02 21:30:45 +**Total Messages:** 15 + +## Chat Messages + +### You - 2025-10-02T21:25:10 +``` +Open Chrome +``` + +### AI - 2025-10-02T21:25:12 +``` +Chrome has been opened successfully +``` + +--- + +## Plugin Usage Log + +``` +[21:25:10] WindowSelectionPlugin.ListWindowHandles +[21:25:11] ExecuteCommand: chrome.exe +[21:25:12] WindowSelectionPlugin.ForegroundSelect (12345678) +``` +``` + +#### Use Cases: +- **Debugging**: Export with tool calls to diagnose issues +- **Documentation**: Share workflows in markdown +- **Analysis**: Parse JSON exports programmatically +- **Support**: Send debug logs to support team +- **Training**: Create tutorials from actual interactions + +--- + +### 2. **Execution Visualizer** 🎯 + +Real-time visual display of step-by-step execution progress. + +#### Features: +- **Step-by-step display**: See each action as it happens +- **Status icons**: ⏳ Pending, ⚙️ In Progress, ✅ Completed, ❌ Failed +- **Progress bar**: Overall completion percentage +- **Color-coded steps**: Visual feedback for status +- **Auto-scroll**: Follows current step automatically + +#### Visual Layout: +``` +┌─────────────────────────────────────┐ +│ Execution Progress │ +│ Status: Step 3/10: Clicking element │ +│ ████████░░░░░░░░ 30% │ +├─────────────────────────────────────┤ +│ #1 ✅ Take screenshot │ +│ #2 ✅ Find window handle │ +│ #3 ⚙️ Click element (in progress) │ +│ #4 ⏳ Verify action │ +│ #5 ⏳ Continue workflow │ +└─────────────────────────────────────┘ +``` + +#### Color Scheme: +- **White/Gray**: Pending (not started) +- **Light Blue**: In Progress (currently executing) +- **Light Green**: Completed (success) +- **Light Red**: Failed (error occurred) +- **Light Gray**: Skipped (intentionally skipped) + +#### Benefits: +- **Transparency**: See exactly what the AI is doing +- **Confidence**: Visual feedback builds trust +- **Debugging**: Identify where failures occur +- **Learning**: Understand AI's problem-solving approach +- **Engagement**: Interactive feel vs black box + +--- + +### 3. **Activity Monitor** 📊 + +Real-time system status and activity logging. + +#### Features: +- **Status Indicators**: AI, ONNX, Browser states +- **Activity Log**: Color-coded event stream +- **Export Capability**: Save logs for analysis +- **Auto-scroll**: Always shows latest activity +- **Level Filtering**: Debug, Info, Success, Warning, Error + +#### Visual Layout: +``` +┌─────────────────────────────────────┐ +│ 🤖 AI: Processing (Blue) │ +│ 👁️ ONNX: Ready (Green) │ +│ 🌐 Browser: Active - Chrome (Green)│ +├─────────────────────────────────────┤ +│ [21:30:45] ℹ️ System: Started task │ +│ [21:30:46] ✅ ONNX: Screenshot OK │ +│ [21:30:47] ℹ️ Planner: Step 1/10 │ +│ [21:30:48] ⚠️ Warning: Slow resp. │ +│ [21:30:49] ✅ Success: Task done │ +└─────────────────────────────────────┘ +``` + +#### Icon Legend: +- 🔍 **Debug**: Detailed diagnostic info +- ℹ️ **Info**: General information +- ✅ **Success**: Positive outcome +- ⚠️ **Warning**: Potential issue +- ❌ **Error**: Failure or problem + +#### Benefits: +- **Awareness**: Know system state at a glance +- **Monitoring**: Watch AI activity in real-time +- **Diagnostics**: Track down performance issues +- **Documentation**: Export for issue reports +- **Transparency**: No hidden operations + +--- + +## 🎨 **UI Philosophy** + +### Interactive & Transparent +Users should **see** what's happening, not guess. Every action should have visual feedback. + +### Informative, Not Overwhelming +Show important information clearly, hide details until needed. Progressive disclosure. + +### Engaging Experience +Computer control should feel **interactive** and **responsive**, not robotic. + +### Debugging-Friendly +When things go wrong, users should have the tools to understand why. + +--- + +## 📋 **Implementation Details** + +### ChatExporter Class + +**Location**: `FlowVision/lib/Classes/ChatExporter.cs` + +**Methods**: +```csharp +// Export to JSON format +ChatExporter.ExportToJson(chatHistory); + +// Export to Markdown format +ChatExporter.ExportToMarkdown(chatHistory); + +// Export with plugin logs for debugging +ChatExporter.ExportWithToolCalls(chatHistory); + +// Quick copy to clipboard +ChatExporter.CopyToClipboard(chatHistory); +``` + +**Features**: +- Save file dialog with format-appropriate defaults +- Automatic filename with timestamp +- Error handling with user feedback +- Includes plugin usage logs in debug export + +--- + +### ExecutionVisualizer Component + +**Location**: `FlowVision/lib/Classes/UI/ExecutionVisualizer.cs` + +**Usage**: +```csharp +var visualizer = new ExecutionVisualizer(); + +// Start execution +visualizer.StartExecution(totalSteps: 10); + +// Add steps +visualizer.AddStep("Take screenshot"); +visualizer.AddStep("Click button"); + +// Update step status +visualizer.UpdateStep(0, StepStatus.InProgress); +visualizer.UpdateStep(0, StepStatus.Completed, "Screenshot captured"); + +// Complete +visualizer.CompleteExecution(success: true); +``` + +**Features**: +- Fluent API for easy integration +- Real-time visual updates +- Auto-scrolling to current step +- Color-coded status indicators +- Progress bar for overall completion + +--- + +### ActivityMonitor Component + +**Location**: `FlowVision/lib/Classes/UI/ActivityMonitor.cs` + +**Usage**: +```csharp +var monitor = new ActivityMonitor(); + +// Update system status +monitor.UpdateAIStatus("Processing", Color.Blue); +monitor.UpdateONNXStatus("Ready", Color.Green); +monitor.UpdateBrowserStatus("Active - Chrome", Color.Green); + +// Log activities +monitor.LogActivity("System", "Task started", ActivityLevel.Info); +monitor.LogActivity("ONNX", "Screenshot captured", ActivityLevel.Success); +monitor.LogActivity("Planner", "Step 1/10", ActivityLevel.Info); +monitor.LogActivity("Network", "Slow response", ActivityLevel.Warning); +monitor.LogActivity("Task", "Completed successfully", ActivityLevel.Success); + +// Export log +monitor.ExportLog(); +``` + +**Features**: +- Thread-safe updates +- Color-coded by severity +- Icon-based visual language +- Timestamp for each entry +- Export capability + +--- + +## 🚀 **Usage Examples** + +### Example 1: Debugging a Failed Task + +**Scenario**: User reports "AI clicked wrong button" + +**Steps**: +1. File → Export Chat → Export Debug Log +2. Open exported file +3. See exact sequence of actions +4. Find tool calls that executed +5. Identify incorrect window handle or coordinates +6. Fix and retest + +**Export Shows**: +``` +### AI - 21:30:47 +``` +Clicking element at coordinates [300, 250] +``` + +## Plugin Usage Log +``` +[21:30:47] MousePlugin.ClickOnWindow(12345678, [300, 250, 500, 310], true, 1) +[21:30:47] Result: Clicked successfully +``` +``` + +**Analysis**: Wrong window handle! Should have been 87654321 (different Chrome window). + +--- + +### Example 2: Monitoring Complex Workflow + +**Scenario**: 15-step automation task + +**Execution Visualizer Shows**: +``` +✅ Step 1/15: Screenshot captured +✅ Step 2/15: Window found (Chrome) +✅ Step 3/15: Brought to foreground +⚙️ Step 4/15: Typing search query (IN PROGRESS) +⏳ Step 5/15: Press Enter (PENDING) +⏳ Step 6/15: Wait for results (PENDING) +... +``` + +**Activity Monitor Shows**: +``` +[21:30:45] ℹ️ System: Starting 15-step workflow +[21:30:46] ✅ ONNX: Screenshot captured (640x480) +[21:30:47] ℹ️ Planner: Step 4/15 - Type query +[21:30:48] ⚙️ Keyboard: SendKeyToWindow(12345678, "Python tutorials") +``` + +**Benefits**: +- User sees progress in real-time +- Confidence that system is working +- Can identify if step is taking too long +- Visual confirmation of each action + +--- + +### Example 3: Sharing Workflow + +**Scenario**: User wants to document their automation + +**Steps**: +1. Complete automation task +2. File → Export Chat → Export to Markdown +3. Share markdown file +4. Others can see exact conversation and results + +**Result**: Clean, readable documentation of the workflow. + +--- + +## 💡 **Novel Features** + +### What Makes These Improvements Unique? + +#### 1. Debug Export with Tool Calls +**Novel**: Most chat apps only export conversations. We export the **actual tool calls** that were executed, making debugging trivial. + +**Impact**: Support teams can see exactly what the AI did, not just what it said. + +#### 2. Real-Time Execution Visualization +**Novel**: Not just a "loading" spinner—users see **each step** with status, icon, and color. + +**Impact**: Builds trust and understanding. Users learn how the AI solves problems. + +#### 3. Activity Monitor Integration +**Novel**: System status + activity log in one place with color-coded severity. + +**Impact**: Power users can monitor system health, casual users see reassuring status indicators. + +#### 4. Multi-Format Export +**Novel**: One feature, four export formats (JSON, Markdown, Debug, Clipboard) for different use cases. + +**Impact**: Flexibility for developers (JSON), documentation writers (Markdown), support (Debug), and quick sharing (Clipboard). + +--- + +## 🎯 **Future Enhancements** + +### Potential Additions + +**1. Element Highlighting**: +- Overlay on screenshots showing where AI will click +- Visual confirmation before execution +- Red outline = target, Green = success + +**2. Timeline View**: +- Horizontal timeline of all steps +- Click to see details of each step +- Duration visualization + +**3. Interactive Step Editing**: +- Pause execution +- Modify next step +- Resume with changes + +**4. Voice Feedback**: +- Optional audio cues for step completion +- "Step 5 complete" announcement +- Accessibility feature + +**5. Analytics Dashboard**: +- Success rate over time +- Most used features +- Average steps per task +- Performance metrics + +**6. Collaboration Features**: +- Share workflows with team +- Import exported workflows +- Template library + +--- + +## 📊 **Metrics** + +### Before UI Improvements: +- **Visibility**: Low (black box behavior) +- **Debugging**: Hard (no logs, no exports) +- **Engagement**: Passive (waiting for results) +- **Trust**: Uncertain (can't see what's happening) + +### After UI Improvements: +- **Visibility**: High (see every step) +- **Debugging**: Easy (export with tool calls) +- **Engagement**: Active (watch progress real-time) +- **Trust**: Strong (transparency builds confidence) + +--- + +## 🔧 **Developer Guide** + +### Adding to Your UI + +**Execution Visualizer**: +```csharp +// In your form +private ExecutionVisualizer visualizer; + +void InitializeVisualizer() +{ + visualizer = new ExecutionVisualizer + { + Dock = DockStyle.Right, + Width = 400 + }; + this.Controls.Add(visualizer); +} + +// During execution +visualizer.StartExecution(steps.Count); +foreach (var step in steps) +{ + visualizer.AddStep(step.Description); +} +``` + +**Activity Monitor**: +```csharp +// In your form +private ActivityMonitor monitor; + +void InitializeMonitor() +{ + monitor = new ActivityMonitor + { + Dock = DockStyle.Right, + Width = 300 + }; + this.Controls.Add(monitor); +} + +// Log activities +monitor.LogActivity("AI", "Task started", ActivityLevel.Info); +``` + +--- + +## ✅ **Testing Checklist** + +### Chat Export +- [ ] JSON export creates valid JSON file +- [ ] Markdown export is readable +- [ ] Debug export includes plugin logs +- [ ] Clipboard copy works +- [ ] Timestamps are correct +- [ ] Large chats export without errors + +### Execution Visualizer +- [ ] Steps appear in correct order +- [ ] Status updates work (Pending → InProgress → Completed) +- [ ] Progress bar updates correctly +- [ ] Auto-scroll follows current step +- [ ] Colors change based on status +- [ ] Failed steps show in red + +### Activity Monitor +- [ ] Status indicators update correctly +- [ ] Activity log shows timestamped entries +- [ ] Color coding works for all levels +- [ ] Export log creates valid file +- [ ] Thread-safe (no UI freezing) +- [ ] Icons display correctly + +--- + +## 📝 **User Documentation** + +### Quick Start: Exporting Chat + +1. Click **File** menu +2. Select **Export Chat** +3. Choose format: + - **JSON**: For developers/programmers + - **Markdown**: For documentation + - **Debug Log**: For troubleshooting + - **Clipboard**: For quick sharing +4. Select save location +5. Done! File is saved + +### Quick Start: Monitoring Execution + +1. Enable Multi-Agent Mode (for step-by-step execution) +2. Start a task +3. Watch the execution visualizer on the right +4. See each step complete with checkmarks +5. Progress bar shows overall completion + +### Quick Start: Activity Monitoring + +1. Open Activity Monitor panel +2. Watch real-time status updates +3. See color-coded activity log +4. Export log if needed for troubleshooting + +--- + +## 🎉 **Impact Summary** + +### What We Achieved: + +1. **Transparency**: Users can see exactly what's happening +2. **Debugability**: Easy to export and analyze +3. **Engagement**: Interactive, visual feedback +4. **Trust**: Builds confidence through visibility +5. **Professionalism**: Polished, modern UI experience + +### User Benefits: + +- ✅ Never wonder "is it working?" +- ✅ Debug issues yourself before asking for help +- ✅ Share workflows easily +- ✅ Learn how AI solves problems +- ✅ Feel in control, not helpless + +### Developer Benefits: + +- ✅ Easy to diagnose user issues +- ✅ Export format works with existing tools +- ✅ Clean component architecture +- ✅ Extensible for future features +- ✅ Well-documented APIs + +--- + +## 🚀 **Build Status** + +``` +✅ All UI components compile successfully +✅ Chat export integrated into File menu +✅ Execution visualizer ready to use +✅ Activity monitor ready to use +✅ No breaking changes +✅ Backward compatible +``` + +--- + +**These UI improvements transform Recursive Control from a functional tool into an engaging, transparent, and user-friendly platform. The focus on visibility, debugging, and interactivity makes it a joy to use!** 🎨✨ diff --git a/docs/UI-Redesign.md b/docs/UI-Redesign.md new file mode 100644 index 0000000..9a59c94 --- /dev/null +++ b/docs/UI-Redesign.md @@ -0,0 +1,570 @@ +# Modern UI Redesign - Novel & Intuitive Interface + +## Date: October 2, 2025 + +## Overview + +We've completely **redesigned the menu structure** to be modern, intuitive, and properly reflect the multi-agent architecture. The old confusing structure (LLM → Setup → Azure OpenAI) has been replaced with a logical, emoji-enhanced, feature-complete menu system. + +--- + +## ❌ **Old Menu Structure (Confusing)** + +``` +File +├─ Tools +└─ New Chat + +Vision +└─ OmniParser + +LLM ← Confusing! Only shows Azure? +└─ Setup + └─ Azure OpenAI ← Where are other models? + +Reason ← What does this even do? +``` + +### Problems: +- ❌ "LLM → Setup → Azure OpenAI" implies only Azure works +- ❌ No way to configure Planner or Coordinator agents +- ❌ No way to configure GitHub agent +- ❌ "Reason" menu item doesn't work +- ❌ No visibility into multi-agent mode +- ❌ No way to access new features (export, visualizers) +- ❌ Not intuitive - users had to guess + +--- + +## ✅ **New Menu Structure (Modern & Clear)** + +``` +📁 File +├─ 🔧 Tools +├─ 🆕 New Chat +└─ 📤 Export Chat + ├─ 📄 Export to JSON + ├─ 📝 Export to Markdown + ├─ 🐛 Export Debug Log (with Tools) + └─ 📋 Copy to Clipboard + +⚙️ Setup +├─ 🔧 Tools +├─ 🤖 AI Agents +│ ├─ ⚡ Actioner Agent (Primary) +│ ├─ 📋 Planner Agent +│ ├─ 🎯 Coordinator Agent +│ └─ 🐙 GitHub Agent +├─ 🔭 Vision Tools +│ └─ 📸 OmniParser Config +└─ 🔀 Multi-Agent Mode ✓ + +👁️ View +├─ 📊 Activity Monitor ✓ +└─ 🎯 Execution Visualizer ✓ + +❓ Help +├─ ℹ️ About +└─ 📚 Documentation +``` + +--- + +## 🎨 **Design Principles** + +### 1. **Emoji Visual Language** 🎨 +Every menu item has an emoji for instant recognition: +- 🤖 = AI Agents +- 🔧 = Configuration/Tools +- 📊 = Monitoring/Analytics +- 🎯 = Execution/Action +- 📤 = Export/Share +- ℹ️ = Information/Help + +**Why?** Faster visual scanning, more engaging, modern UI standards. + +### 2. **Logical Grouping** 📋 +Related items are grouped together: +- **File**: Document operations (new, export) +- **Setup**: Configuration (agents, tools, vision) +- **View**: UI panels (monitor, visualizer) +- **Help**: Information (about, docs) + +### 3. **Clear Hierarchy** 🌳 +Max 2-3 levels deep. No confusing nested menus. + +### 4. **Descriptive Labels** 📝 +"Actioner Agent (Primary)" tells you: +- What it is (Actioner Agent) +- Its role (Primary execution agent) + +### 5. **Checkboxes for Toggles** ✓ +Visual feedback for ON/OFF states: +- ✓ Multi-Agent Mode (enabled) +- ✓ Activity Monitor (visible) + +--- + +## 🆕 **New Features Exposed** + +### AI Agent Configuration + +**All 4 agents now accessible:** + +1. **⚡ Actioner Agent (Primary)** + - The main execution agent + - Handles single-agent mode + - Choose: Azure OpenAI, LM Studio, or GitHub Models + +2. **📋 Planner Agent** + - Plans step-by-step execution + - Used in multi-agent mode + - Separate model configuration + +3. **🎯 Coordinator Agent** + - User interface and routing + - Used in multi-agent mode + - Separate model configuration + +4. **🐙 GitHub Agent** + - Specialized for GitHub operations + - Independent configuration + - Can use GitHub Models free tier + +**Each opens the unified `AIProviderConfigForm` with:** +- Azure OpenAI (Cloud) +- LM Studio (Local) +- GitHub Models (Free Tier) + +--- + +### Multi-Agent Mode Toggle + +**Setup → 🔀 Multi-Agent Mode** (checkbox) + +- **Unchecked (OFF)**: Direct Actioner execution + - Fast, simple tasks + - Single AI agent + - Good for straightforward commands + +- **Checked (ON)**: Coordinator → Planner → Actioner workflow + - Complex, multi-step tasks + - Up to 25 steps + - Adaptive planning + - Better for workflows + +**Visual Feedback:** +When toggled, shows message in chat: +``` +System: Multi-Agent Mode enabled. Using Coordinator → Planner → +Actioner workflow with up to 25 steps. +``` + +--- + +### Export Chat Menu + +**File → 📤 Export Chat** + +4 export formats instantly accessible: +- **JSON**: Machine-readable, for analysis +- **Markdown**: Human-readable, for docs +- **Debug Log**: Includes tool calls, for troubleshooting +- **Clipboard**: Quick copy-paste + +No more hunting for export features! + +--- + +### View Menu (Future-Ready) + +**👁️ View** + +Toggleable UI panels: +- **📊 Activity Monitor**: Real-time system status +- **🎯 Execution Visualizer**: Step-by-step progress + +*Currently shows "coming soon" but infrastructure is ready* + +--- + +## 💡 **Novel Features** + +### 1. Per-Agent Configuration ⭐ + +**What's Novel:** Each agent (Actioner, Planner, Coordinator, GitHub) can use a **different AI provider**. + +**Example Configuration:** +``` +Actioner: Azure GPT-4 (powerful, expensive) +Planner: LM Studio Llama 3 (local, free) +Coordinator: GitHub Phi-4 (fast, free tier) +GitHub: GitHub Models (specialized) +``` + +**Why Novel:** Mix and match based on: +- **Cost**: Use free for simple, paid for complex +- **Latency**: Local for speed, cloud for power +- **Privacy**: Keep sensitive data local +- **Specialization**: Use best model for each role + +### 2. Visual Mode Indicator ⭐ + +**What's Novel:** Checkbox shows current execution mode at a glance. + +``` +✓ Multi-Agent Mode ← 3-agent workflow active + Multi-Agent Mode ← Single agent (direct) +``` + +**Why Novel:** Instant visibility into how your commands will execute. No guessing. + +### 3. Emoji Visual Language ⭐ + +**What's Novel:** Every menu item has a semantic emoji. + +**Why Novel:** +- Faster visual scanning +- Works across languages +- More engaging/modern +- Accessibility (visual cues) + +### 4. Unified Agent Config ⭐ + +**What's Novel:** One form configures all 3 providers (Azure, LM Studio, GitHub) for any agent. + +**Traditional Approach:** +- Separate form per provider +- Confusing which model is active +- Hard to switch + +**Our Approach:** +- Single unified form +- Dropdown to switch providers +- Clear visual indication +- Save/Test buttons + +--- + +## 🎯 **User Experience Improvements** + +### Before: +``` +User: "How do I configure the planner agent?" +Answer: "You can't from the UI, edit config files manually" + +User: "Can I use LM Studio for the coordinator?" +Answer: "Yes but you need to edit JSON" + +User: "How do I enable multi-agent mode?" +Answer: "Tools → Enable Multi-Agent checkbox" + +User: "How do I export chat for debugging?" +Answer: "You can't, check the log files" +``` + +### After: +``` +User: "How do I configure the planner agent?" +Answer: "Setup → AI Agents → Planner Agent" + +User: "Can I use LM Studio for the coordinator?" +Answer: "Setup → AI Agents → Coordinator Agent → + Choose 'LM Studio (Local)'" + +User: "How do I enable multi-agent mode?" +Answer: "Setup → Multi-Agent Mode (click checkbox)" + +User: "How do I export chat for debugging?" +Answer: "File → Export Chat → Export Debug Log" +``` + +**Everything is discoverable!** + +--- + +## 📊 **Menu Structure Details** + +### File Menu +``` +📁 File +├─ 🔧 Tools (Configure plugins) +├─ 🆕 New Chat (Clear conversation) +└─ 📤 Export Chat + ├─ 📄 Export to JSON + ├─ 📝 Export to Markdown + ├─ 🐛 Export Debug Log (with Tools) + └─ 📋 Copy to Clipboard +``` + +**Purpose**: Document/conversation operations + +--- + +### Setup Menu +``` +⚙️ Setup +├─ 🔧 Tools (Plugin configuration) +├─ 🤖 AI Agents +│ ├─ ⚡ Actioner Agent (Primary) +│ ├─ 📋 Planner Agent +│ ├─ 🎯 Coordinator Agent +│ └─ 🐙 GitHub Agent +├─ 🔭 Vision Tools +│ └─ 📸 OmniParser Config +└─ 🔀 Multi-Agent Mode ✓ +``` + +**Purpose**: System configuration + +**AI Agents submenu** - Each opens AIProviderConfigForm: +- Agent name in title +- All 3 providers available +- Independent configuration per agent + +**Multi-Agent Mode** - Toggle with instant feedback: +- Checkbox shows current state +- Click to toggle +- System message confirms change +- Explains what mode does + +--- + +### View Menu +``` +👁️ View +├─ 📊 Activity Monitor ✓ +└─ 🎯 Execution Visualizer ✓ +``` + +**Purpose**: Toggle UI panels + +**Activity Monitor**: +- Real-time system status +- AI/ONNX/Browser states +- Color-coded activity log +- Export capability + +**Execution Visualizer**: +- Step-by-step progress +- Status icons per step +- Progress bar +- Auto-scroll + +*Currently placeholder, full integration coming* + +--- + +### Help Menu +``` +❓ Help +├─ ℹ️ About +└─ 📚 Documentation +``` + +**Purpose**: Information and help + +**About**: +- Version information +- Feature list +- GitHub link +- Quick reference + +**Documentation**: +- Opens GitHub Wiki +- Comprehensive guides +- API documentation +- Examples + +--- + +## 🔧 **Technical Implementation** + +### Menu Structure +```csharp +// Old way (limited) +LLM → Setup → Azure OpenAI + +// New way (comprehensive) +Setup → AI Agents → [Choose Agent] → [Configure Any Provider] +``` + +### Event Handlers + +**Agent Configuration:** +```csharp +private void actionerAgentToolStripMenuItem_Click(object sender, EventArgs e) +{ + AIProviderConfigForm configForm = new AIProviderConfigForm("actioner"); + configForm.ShowDialog(); +} +``` + +**Multi-Agent Toggle:** +```csharp +private void multiAgentModeToolStripMenuItem_Click(object sender, EventArgs e) +{ + var toolConfig = ToolConfig.LoadConfig("toolsconfig"); + toolConfig.EnableMultiAgentMode = multiAgentModeToolStripMenuItem.Checked; + toolConfig.SaveConfig("toolsconfig"); + + AddMessage("System", $"Multi-Agent Mode {status}..."); +} +``` + +**State Persistence:** +```csharp +// On Form Load +var toolConfig = ToolConfig.LoadConfig("toolsconfig"); +multiAgentModeToolStripMenuItem.Checked = toolConfig.EnableMultiAgentMode; +``` + +--- + +## 🎯 **Benefits** + +### For Users +- ✅ **Discoverable**: All features visible in menus +- ✅ **Intuitive**: Logical grouping and clear labels +- ✅ **Visual**: Emojis provide instant recognition +- ✅ **Flexible**: Configure each agent independently +- ✅ **Transparent**: See current mode at a glance + +### For Support +- ✅ **Easy to Guide**: "Go to Setup → AI Agents → Actioner" +- ✅ **Clear State**: Checkboxes show current configuration +- ✅ **Export Tools**: Users can send debug logs easily +- ✅ **Less Confusion**: No more "where do I configure X?" + +### For Developers +- ✅ **Extensible**: Easy to add new menu items +- ✅ **Consistent**: All agents use same config form +- ✅ **Maintainable**: Clear hierarchy and naming +- ✅ **Future-Ready**: View menu ready for new panels + +--- + +## 📋 **Migration Guide** + +### Old → New Mapping + +| Old Location | New Location | +|-------------|--------------| +| LLM → Setup → Azure OpenAI | Setup → AI Agents → Actioner Agent | +| *(No way to config planner)* | Setup → AI Agents → Planner Agent | +| *(No way to config coordinator)* | Setup → AI Agents → Coordinator Agent | +| Vision → OmniParser | Setup → Vision Tools → OmniParser Config | +| Tools → *(checkbox)* | Setup → Multi-Agent Mode | +| *(No export)* | File → Export Chat → [4 formats] | + +--- + +## 🚀 **Future Enhancements** + +### Planned Features + +1. **Quick Config Panel** + - Floating panel with most-used settings + - One-click agent switching + - Live status indicators + +2. **Visual Agent Pipeline** + - Diagram showing: User → Coordinator → Planner → Actioner + - Highlight active agent + - Show which model each uses + +3. **Preset Configurations** + - Save/Load entire configurations + - "Power User" preset (all cloud) + - "Privacy" preset (all local) + - "Budget" preset (all free) + +4. **Smart Suggestions** + - "This task works better with multi-agent mode" + - "Your planner agent is slower than actioner" + - "Consider using local model for privacy" + +5. **Model Performance Metrics** + - Response times per agent + - Token usage tracking + - Cost estimation + - Success rates + +--- + +## ✅ **Build Status** + +``` +✅ New menu structure: Implemented +✅ All 4 agents: Accessible +✅ Multi-agent toggle: Working +✅ Export menu: Functional +✅ View menu: Prepared (placeholder) +✅ Help menu: Functional +✅ State persistence: Working +✅ Emoji support: Rendering correctly +✅ Compilation: 0 errors +✅ No breaking changes +``` + +--- + +## 📸 **Visual Examples** + +### Menu Structure +``` +┌──────────────────────────────────┐ +│ 📁 File ⚙️ Setup 👁️ View ❓ Help │ +└──────────────────────────────────┘ + │ + ├─ 🔧 Tools + ├─ 🆕 New Chat + └─ 📤 Export Chat ───┐ + ├─ 📄 Export to JSON + ├─ 📝 Export to Markdown + ├─ 🐛 Export Debug Log + └─ 📋 Copy to Clipboard +``` + +### Agent Configuration +``` +Setup → 🤖 AI Agents ───┐ + ├─ ⚡ Actioner Agent (Primary) + ├─ 📋 Planner Agent + ├─ 🎯 Coordinator Agent + └─ 🐙 GitHub Agent +``` + +### Mode Indication +``` +Setup +├─ ... other items ... +└─ 🔀 Multi-Agent Mode ✓ ← Currently enabled +``` + +--- + +## 🎉 **Summary** + +### What Changed +- ❌ Removed confusing "LLM" and "Reason" menus +- ✅ Added comprehensive "Setup" menu +- ✅ Added all 4 AI agents to menu +- ✅ Added multi-agent mode toggle +- ✅ Added export capabilities +- ✅ Added view menu for future panels +- ✅ Added help menu +- ✅ Enhanced with emoji visual language + +### Impact +**Before**: Confusing, limited, users had to edit config files +**After**: Intuitive, comprehensive, everything discoverable from UI + +### Novel Aspects +1. Per-agent model configuration (mix and match) +2. Visual mode indicator (checkbox) +3. Emoji-enhanced menu system +4. Unified configuration form +5. 4-format export system + +**The UI is now modern, intuitive, and properly reflects the powerful multi-agent architecture underneath!** 🎨✨ diff --git a/docs/Workflow-Builder-Tutorial.md b/docs/Workflow-Builder-Tutorial.md new file mode 100644 index 0000000..cae4ce9 --- /dev/null +++ b/docs/Workflow-Builder-Tutorial.md @@ -0,0 +1,104 @@ +--- +layout: default +title: Workflow Builder Tutorial +--- + +# Workflow Builder Tutorial: Gamified Learning Adventures + +Level up your Recursive Control skills by building interactive workflows! This tutorial turns learning into a game: complete "quests" by crafting prompt chains that automate real tasks. Each quest includes objectives, step-by-step guidance, and verification challenges. + +Think of this as a simulator – test your prompts here before running them in the app. Earn "badges" by successfully completing each workflow (self-assessed via expected outcomes). + +## Quest 1: GitHub Issue Automator (Beginner Level) +**Objective**: Automate creating a GitHub issue in your repo using browser automation and keyboard inputs. + +**Badge**: Issue Master + +**Step-by-Step Prompt Chain**: +1. **Launch the Browser**: "Open Chrome and navigate to github.com/login." + - *Expected*: Browser opens to GitHub login page. (Verify: Describe the screen to confirm.) + +2. **Login**: "Focus on the username field and type 'yourusername', then tab to password and type 'yourpassword', then press enter." + - *Tip*: Use KeyboardPlugin for targeted input. (Challenge: Add verification – "If login fails, notify me.") + +3. **Navigate to Repo**: "Go to github.com/yourusername/your-repo/issues." + - *Expected*: Issues page loads. + +4. **Create Issue**: "Click the 'New issue' button, type 'Bug: App crashes on load' in title, add description 'Steps to reproduce: 1. Open app. 2. Click button.', then submit." + - *Challenge*: Use ScreenCapture to identify the button's bounding box before clicking. + +**Full Chain Prompt** (Copy-Paste Ready): +``` +Perform these steps to create a GitHub issue: +1. Open Chrome and go to github.com/login. +2. Log in with username 'yourusername' and password 'yourpassword'. +3. Navigate to github.com/yourusername/your-repo/issues. +4. Click 'New issue', fill title 'Bug: App crashes', description 'Steps: Open app, click button', and submit. +Verify each step with a screenshot description. +``` + +**Verification Quest**: Run this in Recursive Control. Did it create the issue? If not, refine the prompt (e.g., handle 2FA). + +## Quest 2: Daily Report Generator (Intermediate Level) +**Objective**: Automate opening Excel, filling data, and saving a report. + +**Badge**: Report Wizard + +**Step-by-Step Prompt Chain**: +1. **Open App**: "Launch Excel and create a new spreadsheet." +2. **Add Headers**: "Type 'Date' in A1, 'Task' in B1, 'Status' in C1." +3. **Fill Data**: "In A2 type today's date, B2 'Implement feature X', C2 'Completed'." +4. **Save**: "Save as 'daily-report.xlsx' in Documents." + +**Full Chain Prompt**: +``` +Build a daily report in Excel: +1. Open Excel, new file. +2. Headers: A1=Date, B1=Task, C1=Status. +3. Data: A2=today's date, B2=Review code, C2=Done. +4. Save to Documents as daily-report.xlsx. +Confirm each cell after typing. +``` + +**Challenge**: Add conditional logic – "If file exists, append instead of overwrite." + +## Quest 3: File Organizer Bot (Advanced Level) +**Objective**: Scan Downloads folder, organize files by type using CMD/PowerShell. + +**Badge**: Organization Overlord + +**Step-by-Step Prompt Chain**: +1. **Scan Folder**: "Use CMD to list files in Downloads." +2. **Create Folders**: "Make directories: Images, Documents, Others." +3. **Move Files**: "Move .jpg to Images, .pdf to Documents, others to Others." +4. **Verify**: "List contents of each new folder." + +**Full Chain Prompt**: +``` +Organize Downloads: +1. CMD: dir %USERPROFILE%\Downloads +2. Create folders: mkdir Images Documents Others +3. Move: move *.jpg Images, move *.pdf Documents, move *.* Others +4. Verify: dir Images, dir Documents, dir Others +Handle errors if folders exist. +``` + +**Epic Challenge**: Integrate vision – "Screenshot Downloads folder and describe file icons before moving." + +## Level Up Tips +- **Gamification Hack**: Track your success rate. Aim for 100% on 5 quests to "unlock" custom workflow creation. +- **Combine Quests**: Chain them, e.g., "Generate report, then create GitHub issue about it." +- **Debug Mode**: If a step fails, add "Describe the screen and suggest fixes" to your prompts. + +## Go Interactive: Jupyter Notebook Simulator +For hands-on practice, download our [Interactive Workflow Builder Notebook](tutorials/Workflow-Builder-Notebook.ipynb). It includes widgets to build and simulate prompt chains without running the full app! + +**How to Use**: +1. Install Jupyter: `pip install notebook` +2. Download the .ipynb file. +3. Run `jupyter notebook` and open the file. +4. Interact with widgets to test prompts in real-time. + +Completed all quests? Share your custom workflows on Discord! + +Back to [Getting Started](Getting-Started.html) diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 0000000..3da82ac --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,30 @@ +# GitHub Pages Configuration for Recursive Control + +# Site settings +title: Recursive Control Documentation +description: AI-Powered Computer Control for Windows +baseurl: "/Recursive-Control" +url: "https://flowdevs-io.github.io" + +theme: null + +# Plugins (GitHub Pages whitelist) +plugins: + - jekyll-remote-theme + - jekyll-seo-tag + - jekyll-sitemap + +# Sass pipeline +sass: + sass_dir: _sass + style: compressed + +# Markdown settings +markdown: kramdown +kramdown: + input: GFM + hard_wrap: false + syntax_highlighter: rouge + +# Show downloads button +show_downloads: false diff --git a/docs/_data/navigation.yml b/docs/_data/navigation.yml new file mode 100644 index 0000000..a374e14 --- /dev/null +++ b/docs/_data/navigation.yml @@ -0,0 +1,74 @@ +# Navigation map for the Recursive Control docs portal +# Each group defines a lane in the Compass navigation and command palette + +- title: Mission Control + icon: ✨ + links: + - label: Home + path: / + description: "Orientation deck, latest signal boosts, and portal jump points" + - label: Installation Capsule + path: /Installation.html + description: "Everything required to land Recursive Control on Windows" + - label: First Flight Checklist + path: /Getting-Started.html + description: "Spin up your agents and complete your first automations" + +- title: Ops Dashboard + icon: 🧭 + links: + - label: UI Features Atlas + path: /UI-Features.html + description: "Every panel, overlay, and feedback loop mapped" + - label: UI Redesign Narrative + path: /UI-Redesign.html + description: "Evolution of the command cockpit and design rituals" + - label: Release Trail Journal + path: /Blog-Post-v2.0.html + description: "Product story from MVP experiments to AI copilots" + +- title: Systems Lab + icon: ⚙️ + links: + - label: Multi-Agent Architecture + path: /Multi-Agent-Architecture.html + description: "Planner, coordinator, executor – the triad that flies your missions" + - label: System Prompts Reference + path: /System-Prompts-Reference.html + description: "Tune personas, guardrails, and playbooks for each agent" + - label: OmniParser Setup + path: /guides/OmniParser-Setup.html + description: "Wire up vision parsing to unlock spatial reasoning" + - label: AI Provider Comparison + path: /guides/AI-Provider-Comparison.html + description: "Latency, cost, and capability matrix across model vendors" + - label: LM Studio Quickstart + path: /guides/LM-Studio-Quickstart.html + description: "Local-first deployments powered by LM Studio" + +- title: Extensible Orbit + icon: 🧩 + links: + - label: API Reference + path: /API-Reference.html + description: "REST endpoints, payloads, and automation recipes" + - label: Plugin Architecture Primer + path: /Multi-Agent-Architecture.html#plugin-ecosystem + description: "Where plugins dock into the agent orchestration" + - label: UI Telemetry Hooks + path: /UI-Features.html#telemetry-hooks + description: "Signal taps for real-time dashboards and observers" + +- title: Support Bay + icon: 🛟 + links: + - label: FAQ Loop + path: /FAQ.html + description: "Quick responses to the most common mission questions" + - label: Troubleshooting Console + path: /Troubleshooting.html + description: "On-call runbooks for recovering from turbulence" + - label: Repo Handbook + path: /README.html + description: "Contribution guidelines, strategy, and governance notes" + diff --git a/docs/_includes/command-palette.html b/docs/_includes/command-palette.html new file mode 100644 index 0000000..908eec4 --- /dev/null +++ b/docs/_includes/command-palette.html @@ -0,0 +1,20 @@ + + + + + Quick Navigation + Type to search or jump with ↑ ↓ ↵ · Press Esc to close + + + + Press Enter to open selection + + + + + + diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html new file mode 100644 index 0000000..a71585c --- /dev/null +++ b/docs/_layouts/default.html @@ -0,0 +1,153 @@ + + + {% assign nav = site.data.navigation %} + + + + + + + {% if page.title %}{{ page.title }} · {% endif %}{{ site.title }} + {%- seo -%} + + + + + + + + + + + + + Command Palette + CtrlK + + + {% include command-palette.html %} + + + + + + + + {{ site.description }} + + + + {{ page.title | default: 'Recursive Control Docs' }} + {% if page.subtitle %} + {{ page.subtitle }} + {% endif %} + + + Install + Quickstart + Releases + + + + + + + {{ content }} + + + + + + + + + + + diff --git a/docs/_sass/compass.scss b/docs/_sass/compass.scss new file mode 100644 index 0000000..b058ce3 --- /dev/null +++ b/docs/_sass/compass.scss @@ -0,0 +1,894 @@ +$space-base: 1rem; +$color-bg: #07070a; +$color-surface: rgba(18, 20, 25, 0.82); +$color-surface-alt: rgba(12, 14, 19, 0.95); +$color-accent: #8f6bff; +$color-accent-soft: rgba(143, 107, 255, 0.2); +$color-accent-strong: #7a5cff; +$color-text: #f4f4f6; +$color-text-dim: rgba(244, 244, 246, 0.72); +$color-border: rgba(255, 255, 255, 0.08); +$color-positive: #6bffa7; +$transition: 180ms ease; +$font-display: "Space Grotesk", "Segoe UI", sans-serif; + +* { + box-sizing: border-box; +} + +html, +body { + padding: 0; + margin: 0; + font-family: $font-display; + background: $color-bg; + color: $color-text; + min-height: 100%; +} + +body::before { + content: ""; + position: fixed; + inset: 0; + background: radial-gradient(circle at top right, rgba(143, 107, 255, 0.15), transparent 55%), + radial-gradient(circle at bottom left, rgba(107, 255, 197, 0.08), transparent 50%); + pointer-events: none; + z-index: -2; +} + +.gradient-spark { + position: fixed; + filter: blur(90px); + opacity: 0.32; + z-index: -1; + width: 480px; + height: 480px; + border-radius: 50%; + background: radial-gradient(circle, rgba(143, 107, 255, 0.55), rgba(143, 107, 255, 0)); + top: -120px; + right: -120px; + + &--two { + top: auto; + bottom: -160px; + left: -140px; + background: radial-gradient(circle, rgba(107, 255, 197, 0.45), rgba(107, 255, 197, 0)); + } +} + +a { + color: $color-text; + text-decoration: none; +} + +a:hover { + color: $color-positive; +} + +.viewport-frame { + display: grid; + grid-template-columns: 340px 1fr; + min-height: 100vh; + align-items: start; +} + +.nav-compass { + position: sticky; + top: 0; + height: 100vh; + padding: $space-base * 1.5; + backdrop-filter: blur(18px); + background: rgba(6, 7, 10, 0.6); + border-right: 1px solid $color-border; + display: flex; + flex-direction: column; +} + +.nav-compass__header { + display: flex; + align-items: center; + justify-content: space-between; + gap: $space-base; + padding-bottom: $space-base; + border-bottom: 1px solid $color-border; +} + +.brand-mark { + display: flex; + gap: $space-base * 0.75; + align-items: center; + padding: $space-base * 0.5 $space-base * 0.75; + border-radius: 999px; + background: rgba(255, 255, 255, 0.08); + transition: background $transition, transform $transition; + + &:hover { + background: rgba(255, 255, 255, 0.14); + transform: translateY(-1px); + } +} + +.brand-mark__orb { + width: 22px; + height: 22px; + border-radius: 50%; + background: conic-gradient(from 180deg, rgba(143, 107, 255, 1) 0%, rgba(107, 255, 197, 1) 45%, rgba(255, 255, 255, 0.45) 100%); + box-shadow: 0 0 14px rgba(143, 107, 255, 0.55); +} + +.brand-mark__title { + font-size: 0.95rem; + font-weight: 600; +} + +.brand-mark__subtitle { + font-size: 0.72rem; + color: $color-text-dim; +} + +.nav-controls { + display: flex; + gap: $space-base * 0.5; +} + +.nav-toggle, +.theme-toggle { + background: rgba(255, 255, 255, 0.08); + border: 1px solid transparent; + border-radius: 999px; + padding: $space-base * 0.4 $space-base * 0.7; + color: $color-text; + display: inline-flex; + align-items: center; + gap: $space-base * 0.4; + cursor: pointer; + transition: border-color $transition, background $transition; + + &:hover { + border-color: rgba(255, 255, 255, 0.24); + } +} + +.nav-toggle__icon, +.theme-toggle__icon { + width: 16px; + height: 16px; + position: relative; +} + +.nav-toggle__icon::before, +.nav-toggle__icon::after, +.theme-toggle__icon::before, +.theme-toggle__icon::after { + content: ""; + position: absolute; + inset: 0; + border-radius: 50%; + background: linear-gradient(135deg, rgba(143, 107, 255, 0.9), rgba(107, 255, 197, 0.9)); + opacity: 0.7; +} + +.theme-toggle__icon::after { + filter: blur(6px); + opacity: 0.4; +} + +.theme-toggle__label { + font-size: 0.85rem; +} + +.nav-compass__scroll { + margin-top: $space-base * 1.5; + overflow-y: auto; + padding-right: $space-base * 0.5; +} + +.nav-stacks { + display: grid; + gap: $space-base; +} + +.nav-stack { + border: 1px solid $color-border; + background: $color-surface; + border-radius: 24px; + padding: $space-base; + box-shadow: 0 12px 32px rgba(7, 7, 10, 0.45); + transition: transform $transition, border-color $transition, box-shadow 240ms ease; + + &[data-active="true"] { + transform: translateY(-2px); + border-color: rgba(143, 107, 255, 0.5); + box-shadow: 0 16px 40px rgba(143, 107, 255, 0.25); + } +} + +.nav-stack__header { + display: flex; + gap: $space-base * 0.75; + align-items: center; + margin-bottom: $space-base * 0.75; +} + +.nav-stack__icon { + display: inline-flex; + align-items: center; + justify-content: center; + width: 40px; + height: 40px; + border-radius: 14px; + background: $color-accent-soft; + font-size: 1.2rem; +} + +.nav-stack__links { + list-style: none; + padding: 0; + margin: 0; + display: grid; + gap: $space-base * 0.4; +} + +.nav-stack__links li { + margin: 0; +} + +[data-nav-link] { + display: flex; + align-items: center; + justify-content: space-between; + gap: $space-base * 0.75; + padding: $space-base * 0.75; + border-radius: 16px; + border: 1px solid transparent; + background: rgba(255, 255, 255, 0.04); + transition: border-color $transition, background $transition, transform $transition; + + &:hover, + &:focus { + border-color: rgba(143, 107, 255, 0.45); + background: rgba(143, 107, 255, 0.12); + transform: translateX(4px); + } + + &.is-active { + border-color: rgba(143, 107, 255, 0.75); + background: rgba(143, 107, 255, 0.18); + box-shadow: inset 0 0 0 1px rgba(143, 107, 255, 0.4); + } +} + +.nav-link__label { + font-size: 0.95rem; + font-weight: 500; +} + +.nav-link__chevron { + font-size: 1.1rem; + opacity: 0.55; +} + +.nav-empty { + color: $color-text-dim; + font-size: 0.9rem; +} + +.nav-compass__footer { + margin-top: auto; + border-top: 1px solid $color-border; + padding-top: $space-base; + display: grid; + gap: $space-base * 0.5; +} + +.nav-footer-link { + display: flex; + justify-content: space-between; + align-items: center; + gap: $space-base * 0.5; + padding: $space-base * 0.65 $space-base * 0.85; + border-radius: 12px; + background: rgba(255, 255, 255, 0.05); + transition: background $transition, transform $transition; + + span:last-child { + opacity: 0.45; + } + + &:hover { + background: rgba(255, 255, 255, 0.1); + transform: translateY(-1px); + } +} + +.content-orbit { + display: flex; + flex-direction: column; + min-height: 100vh; + padding: $space-base * 1.5 $space-base * 2; +} + +.page-masthead { + display: grid; + grid-template-columns: 1fr auto; + gap: $space-base; + padding: $space-base * 1.5; + border-radius: 32px; + border: 1px solid $color-border; + background: $color-surface-alt; + box-shadow: 0 24px 50px rgba(7, 7, 10, 0.55); + margin-bottom: $space-base * 1.5; + position: relative; + overflow: hidden; +} + +.page-masthead::after { + content: ""; + position: absolute; + inset: 0; + background: radial-gradient(circle at top left, rgba(143, 107, 255, 0.25), transparent 55%); + mix-blend-mode: screen; +} + +.masthead__meta { + display: flex; + align-items: center; + gap: $space-base * 0.75; +} + +.masthead__eyebrow { + font-size: 0.8rem; + text-transform: uppercase; + letter-spacing: 0.2em; + color: $color-text-dim; +} + +.masthead__pulse { + width: 14px; + height: 14px; + border-radius: 50%; + background: $color-positive; + box-shadow: 0 0 0 0 rgba(107, 255, 167, 0.65); + animation: pulse 2.8s ease-out infinite; +} + +.masthead__titles h1 { + margin: 0; + font-size: 2.4rem; + font-weight: 600; + letter-spacing: -0.02em; +} + +.masthead__subtitle { + color: $color-text-dim; + font-size: 1.05rem; + max-width: 540px; + margin-top: $space-base * 0.5; +} + +.masthead__actions { + display: flex; + align-items: center; + gap: $space-base; +} + +.masthead__action { + background: $color-accent; + border-radius: 999px; + padding: $space-base * 0.75 $space-base * 1.4; + font-weight: 600; + letter-spacing: 0.01em; + color: #0b0415; + transition: transform $transition, box-shadow $transition; + + &:hover { + transform: translateY(-1px); + box-shadow: 0 12px 24px rgba(143, 107, 255, 0.35); + } + + &--secondary { + background: rgba(255, 255, 255, 0.12); + color: $color-text; + } + + &--ghost { + background: transparent; + border: 1px solid rgba(255, 255, 255, 0.28); + color: $color-text; + + &:hover { + border-color: rgba(255, 255, 255, 0.5); + } + } +} + +.reading-lane { + display: grid; + grid-template-columns: 280px minmax(0, 1fr); + gap: $space-base * 1.5; + align-items: start; +} + +.reading-lane__rail { + display: grid; + gap: $space-base; + position: sticky; + top: $space-base * 1.5; + align-self: start; +} + +.rail-card { + border-radius: 24px; + border: 1px solid $color-border; + background: $color-surface; + padding: $space-base * 1.1; + display: grid; + gap: $space-base * 0.6; + min-height: 140px; + box-shadow: 0 12px 30px rgba(7, 7, 10, 0.45); +} + +.rail-card--context { + position: relative; +} + +.rail-card--context::before { + content: ""; + position: absolute; + inset: -1px; + border-radius: 24px; + border: 1px solid rgba(143, 107, 255, 0.22); + pointer-events: none; +} + +.rail-card__title { + font-weight: 600; + font-size: 0.9rem; + letter-spacing: 0.08em; + text-transform: uppercase; + color: $color-text-dim; +} + +.rail-card__body { + font-size: 0.95rem; + color: $color-text; +} + +.rail-card--toc { + padding: 0; + overflow: hidden; +} + +.rail-card--toc nav { + display: block; + max-height: 540px; + overflow: hidden auto; +} + +.rail-card--toc ul { + list-style: none; + margin: 0; + padding: $space-base; + display: grid; + gap: $space-base * 0.5; +} + +.rail-card--toc a { + display: flex; + align-items: center; + gap: $space-base * 0.6; + padding: $space-base * 0.5 $space-base * 0.75; + border-radius: 14px; + border: 1px solid transparent; + background: transparent; + color: $color-text-dim; + font-size: 0.9rem; + + &:hover, + &:focus, + &.is-active { + border-color: rgba(143, 107, 255, 0.35); + background: rgba(143, 107, 255, 0.1); + color: $color-text; + } +} + +.doc-canvas { + border-radius: 32px; + border: 1px solid $color-border; + background: rgba(10, 12, 18, 0.78); + backdrop-filter: blur(12px); + padding: $space-base * 2; + box-shadow: 0 24px 60px rgba(7, 7, 10, 0.65); +} + +.doc-canvas h1, +.doc-canvas h2, +.doc-canvas h3, +.doc-canvas h4 { + color: $color-text; +} + +.doc-canvas p, +.doc-canvas li { + color: $color-text-dim; + line-height: 1.65; +} + +.doc-canvas pre { + background: rgba(7, 7, 10, 0.9); + border-radius: 18px; + padding: $space-base * 1.2; + border: 1px solid rgba(255, 255, 255, 0.08); + box-shadow: inset 0 0 0 1px rgba(255, 255, 255, 0.04); +} + +.doc-canvas code { + font-size: 0.9rem; + color: $color-positive; +} + +.doc-canvas table { + width: 100%; + border-collapse: collapse; + margin: $space-base 0; +} + +.doc-canvas th, +.doc-canvas td { + border: 1px solid rgba(255, 255, 255, 0.05); + padding: $space-base * 0.75; + text-align: left; +} + +.page-footer { + margin-top: $space-base * 2; + display: flex; + justify-content: space-between; + gap: $space-base; + font-size: 0.92rem; + color: $color-text-dim; +} + +.page-footer__left, +.page-footer__right { + display: inline-flex; + align-items: center; + gap: $space-base * 0.5; +} + +.page-footer a { + color: $color-text; + text-decoration: underline; + text-decoration-color: rgba(255, 255, 255, 0.3); + + &:hover { + text-decoration-color: rgba(255, 255, 255, 0.6); + } +} + +.command-launch { + position: fixed; + bottom: $space-base * 1.5; + right: $space-base * 1.5; + background: rgba(143, 107, 255, 0.18); + border: 1px solid rgba(143, 107, 255, 0.45); + color: $color-text; + border-radius: 999px; + padding: $space-base * 0.75 $space-base * 1.3; + display: flex; + align-items: center; + gap: $space-base * 0.6; + font-weight: 600; + backdrop-filter: blur(18px); + cursor: pointer; + transition: transform $transition, box-shadow $transition, background $transition; + z-index: 12; + + &:hover { + transform: translateY(-2px); + background: rgba(143, 107, 255, 0.3); + box-shadow: 0 12px 30px rgba(143, 107, 255, 0.45); + } + + kbd { + background: rgba(255, 255, 255, 0.12); + border-radius: 6px; + padding: 2px 6px; + font-size: 0.78rem; + } +} + +.command-palette { + position: fixed; + inset: 0; + display: none; + align-items: center; + justify-content: center; + z-index: 15; +} + +.command-palette[aria-hidden="false"] { + display: flex; +} + +.command-palette__backdrop { + position: absolute; + inset: 0; + background: rgba(4, 5, 10, 0.72); + backdrop-filter: blur(16px); +} + +.command-palette__panel { + position: relative; + width: min(720px, 90vw); + border-radius: 28px; + padding: $space-base * 1.6; + border: 1px solid rgba(255, 255, 255, 0.08); + background: rgba(10, 12, 18, 0.96); + box-shadow: 0 26px 60px rgba(7, 7, 10, 0.75); + display: grid; + gap: $space-base * 1.1; +} + +.command-palette__header h2 { + margin: 0; + font-size: 1.2rem; + letter-spacing: 0.06em; + text-transform: uppercase; + color: $color-text-dim; +} + +.command-palette__hint { + font-size: 0.82rem; + color: rgba(244, 244, 246, 0.56); + margin: 0.2rem 0 0; +} + +.command-palette__search { + display: grid; + gap: $space-base * 0.6; +} + +.command-palette__search input { + width: 100%; + padding: $space-base * 0.9 $space-base; + border-radius: 18px; + border: 1px solid rgba(255, 255, 255, 0.12); + background: rgba(4, 5, 10, 0.8); + color: $color-text; + font-size: 1.05rem; + outline: none; + transition: border-color $transition; + + &:focus { + border-color: rgba(143, 107, 255, 0.5); + box-shadow: 0 0 0 3px rgba(143, 107, 255, 0.2); + } +} + +.command-palette__status { + font-size: 0.85rem; + color: $color-text-dim; +} + +.command-palette__groups { + display: grid; + gap: $space-base * 0.75; + max-height: 360px; + overflow-y: auto; +} + +.command-group { + border-radius: 18px; + border: 1px solid rgba(255, 255, 255, 0.08); + background: rgba(18, 20, 25, 0.9); + overflow: hidden; +} + +.command-group header { + display: flex; + align-items: center; + gap: $space-base * 0.6; + padding: $space-base * 0.75 $space-base; + font-size: 0.9rem; + color: $color-text-dim; +} + +.command-group__icon { + display: inline-flex; + width: 28px; + height: 28px; + border-radius: 10px; + align-items: center; + justify-content: center; + background: rgba(143, 107, 255, 0.18); +} + +.command-group__list { + list-style: none; + margin: 0; + padding: 0; +} + +.command-group__item { + border-top: 1px solid rgba(255, 255, 255, 0.06); +} + +.command-group__link { + display: flex; + align-items: center; + justify-content: space-between; + gap: $space-base * 0.75; + padding: $space-base * 0.75 $space-base; + transition: background $transition, transform $transition; + + &:hover, + &.is-active { + background: rgba(143, 107, 255, 0.18); + transform: translateX(2px); + } +} + +.command-group__meta { + display: grid; + gap: $space-base * 0.35; +} + +.command-group__title { + font-weight: 600; +} + +.command-group__desc { + font-size: 0.85rem; + color: $color-text-dim; +} + +.command-group__key { + font-size: 0.82rem; + color: rgba(244, 244, 246, 0.52); +} + +.command-palette__footer { + display: flex; + justify-content: space-between; + align-items: center; + gap: $space-base; + font-size: 0.78rem; + color: rgba(244, 244, 246, 0.48); +} + +.command-palette__footer span { + display: inline-flex; + gap: $space-base * 0.4; + align-items: center; +} + +.command-palette__footer kbd { + background: rgba(255, 255, 255, 0.12); + border-radius: 6px; + padding: 2px 6px; +} + +.page-footer a, +.command-group__link { + text-decoration: none; +} + +@keyframes pulse { + 0% { + box-shadow: 0 0 0 0 rgba(107, 255, 167, 0.65); + } + + 70% { + box-shadow: 0 0 0 16px rgba(107, 255, 167, 0); + } + + 100% { + box-shadow: 0 0 0 0 rgba(107, 255, 167, 0); + } +} + +@media (max-width: 1200px) { + .viewport-frame { + grid-template-columns: 300px 1fr; + } + + .reading-lane { + grid-template-columns: 1fr; + } + + .reading-lane__rail { + position: static; + grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); + grid-auto-flow: column; + overflow-x: auto; + } +} + +@media (max-width: 980px) { + .viewport-frame { + grid-template-columns: 1fr; + } + + .nav-compass { + position: fixed; + z-index: 14; + transform: translateX(-100%); + transition: transform 260ms ease; + width: min(340px, 90vw); + } + + .nav-compass[data-open="true"] { + transform: translateX(0); + } + + .nav-toggle__label { + display: none; + } + + .content-orbit { + padding-top: $space-base * 6; + } + + .command-launch { + right: 50%; + transform: translateX(50%); + } +} + +@media (max-width: 680px) { + .page-masthead { + grid-template-columns: 1fr; + } + + .masthead__actions { + flex-wrap: wrap; + } +} + +:root.light { + --bg: #f7f7fb; + --text: #141726; + --text-dim: rgba(20, 23, 38, 0.72); + --surface: rgba(255, 255, 255, 0.85); + --surface-alt: rgba(255, 255, 255, 0.96); + --border: rgba(20, 23, 38, 0.12); +} + +.theme-light { + background: var(--bg); + color: var(--text); +} + +.theme-light .nav-compass, +.theme-light .doc-canvas, +.theme-light .page-masthead, +.theme-light .rail-card { + background: var(--surface); + border-color: var(--border); + color: var(--text); +} + +.theme-light .doc-canvas p, +.theme-light .doc-canvas li { + color: var(--text-dim); +} + +.theme-light .masthead__eyebrow { + color: var(--text-dim); +} + +.theme-light .nav-stack { + background: var(--surface-alt); +} + +.theme-light .rail-card--toc a { + color: var(--text-dim); +} + +.theme-light .command-palette__panel { + background: rgba(255, 255, 255, 0.98); + color: var(--text); +} + diff --git a/docs/assets/css/compass.scss b/docs/assets/css/compass.scss new file mode 100644 index 0000000..1ca8997 --- /dev/null +++ b/docs/assets/css/compass.scss @@ -0,0 +1,4 @@ +--- +--- +@import "compass"; + diff --git a/docs/assets/js/compass.js b/docs/assets/js/compass.js new file mode 100644 index 0000000..434a2fd --- /dev/null +++ b/docs/assets/js/compass.js @@ -0,0 +1,337 @@ +(() => { + const body = document.body; + const nav = document.querySelector('.nav-compass'); + const navLinks = [...document.querySelectorAll('[data-nav-link]')]; + const navDescription = document.getElementById('nav-description'); + const navToggle = document.querySelector('[data-nav-toggle]'); + const commandPalette = document.getElementById('command-palette'); + const commandOpeners = [...document.querySelectorAll('[data-palette-open]')]; + const commandCloser = commandPalette?.querySelector('[data-palette-close]'); + const commandInput = document.getElementById('command-palette-input'); + const commandResults = document.getElementById('command-palette-results'); + const commandStatus = document.getElementById('command-palette-status'); + const themeToggle = document.querySelector('[data-theme-toggle]'); + const readingRail = document.querySelector('[data-rail]'); + const docCanvas = document.querySelector('[data-doc-canvas]'); + + const navData = window.__RC_NAV__ || []; + const shortcuts = new Map(); + + const setNavDescription = (text) => { + if (navDescription) { + navDescription.textContent = text; + } + }; + + navLinks.forEach((link) => { + const description = link.dataset.description; + link.addEventListener('mouseenter', () => setNavDescription(description || '')); + link.addEventListener('focus', () => setNavDescription(description || '')); + link.addEventListener('mouseleave', () => setNavDescription('Hover a page in the Compass to read its story.')); + link.addEventListener('blur', () => setNavDescription('Hover a page in the Compass to read its story.')); + }); + + const mobileNav = { + open: () => { + if (nav) { + nav.dataset.open = 'true'; + } + }, + close: () => { + if (nav) { + nav.dataset.open = 'false'; + } + }, + toggle: () => { + if (nav) { + const open = nav.dataset.open === 'true'; + nav.dataset.open = open ? 'false' : 'true'; + } + } + }; + + if (navToggle) { + navToggle.addEventListener('click', () => mobileNav.toggle()); + } + + const palette = { + visible: false, + open() { + if (!commandPalette) return; + commandPalette.setAttribute('aria-hidden', 'false'); + document.body.style.overflow = 'hidden'; + commandInput?.focus(); + this.visible = true; + renderCommandPalette(''); + }, + close() { + if (!commandPalette) return; + commandPalette.setAttribute('aria-hidden', 'true'); + document.body.style.overflow = ''; + commandInput?.blur(); + this.visible = false; + }, + toggle() { + if (this.visible) { + this.close(); + } else { + this.open(); + } + } + }; + + commandOpeners.forEach((button) => button.addEventListener('click', () => palette.open())); + commandCloser?.addEventListener('click', () => palette.close()); + + document.addEventListener('keydown', (event) => { + if ((event.ctrlKey || event.metaKey) && event.key.toLowerCase() === 'k') { + event.preventDefault(); + palette.toggle(); + } + + if (event.key === 'Escape' && palette.visible) { + palette.close(); + } + }); + + let activeCommandIndex = 0; + + const getVisibleCommands = () => { + return [...commandResults.querySelectorAll('.command-group__link')]; + }; + + const moveCommandFocus = (direction) => { + const commands = getVisibleCommands(); + if (!commands.length) return; + + activeCommandIndex = (activeCommandIndex + direction + commands.length) % commands.length; + commands.forEach((cmd, index) => cmd.classList.toggle('is-active', index === activeCommandIndex)); + commands[activeCommandIndex].scrollIntoView({ block: 'nearest' }); + }; + + const activateCommand = () => { + const commands = getVisibleCommands(); + commands[activeCommandIndex]?.click(); + }; + + const renderCommandPalette = (query) => { + if (!commandResults) return; + + const normalized = query.trim().toLowerCase(); + + const groups = navData.map((group) => { + const filteredLinks = group.links.filter((link) => { + if (!normalized) return true; + return ( + link.label.toLowerCase().includes(normalized) || + (link.description && link.description.toLowerCase().includes(normalized)) + ); + }); + + return { ...group, links: filteredLinks }; + }).filter((group) => group.links.length > 0); + + commandResults.innerHTML = ''; + + if (groups.length === 0) { + commandResults.innerHTML = 'No matches yet. Try a narrower phrase.'; + commandStatus.textContent = 'No matches'; + return; + } + + const fragment = document.createDocumentFragment(); + + groups.forEach((group, groupIndex) => { + const section = document.createElement('section'); + section.className = 'command-group'; + section.setAttribute('role', 'group'); + section.setAttribute('aria-labelledby', `command-group-${groupIndex}`); + + const header = document.createElement('header'); + header.innerHTML = ` + ${group.icon || ''} + ${group.title} + `; + section.appendChild(header); + + const list = document.createElement('ul'); + list.className = 'command-group__list'; + + group.links.forEach((link, linkIndex) => { + const item = document.createElement('li'); + item.className = 'command-group__item'; + + const anchor = document.createElement('a'); + anchor.className = 'command-group__link'; + anchor.href = link.path; + anchor.setAttribute('role', 'option'); + anchor.dataset.commandIndex = `${groupIndex}-${linkIndex}`; + anchor.innerHTML = ` + + ${link.label} + ${link.description || ''} + + ↵ + `; + + anchor.addEventListener('click', (event) => { + event.preventDefault(); + window.location.href = anchor.href; + palette.close(); + }); + + item.appendChild(anchor); + list.appendChild(item); + }); + + section.appendChild(list); + fragment.appendChild(section); + }); + + commandResults.appendChild(fragment); + const total = groups.reduce((sum, group) => sum + group.links.length, 0); + commandStatus.textContent = `${total} result${total === 1 ? '' : 's'}`; + activeCommandIndex = 0; + getVisibleCommands()[0]?.classList.add('is-active'); + }; + + commandInput?.addEventListener('input', (event) => { + renderCommandPalette(event.target.value); + }); + + commandInput?.addEventListener('keydown', (event) => { + if (event.key === 'ArrowDown') { + event.preventDefault(); + moveCommandFocus(1); + } else if (event.key === 'ArrowUp') { + event.preventDefault(); + moveCommandFocus(-1); + } else if (event.key === 'Enter') { + event.preventDefault(); + activateCommand(); + } + }); + + const initializeShortcuts = () => { + shortcuts.set('slash', (event) => { + if (event.target === commandInput) { + event.stopPropagation(); + } + }); + + document.addEventListener('keydown', (event) => { + if (event.key === 't' && !['INPUT', 'TEXTAREA'].includes(event.target.tagName)) { + event.preventDefault(); + toggleTheme(); + } + }); + }; + + const toggleTheme = () => { + const current = localStorage.getItem('rc-theme') || 'auto'; + const next = current === 'light' ? 'dark' : current === 'dark' ? 'auto' : 'light'; + applyTheme(next); + }; + + const applyTheme = (mode) => { + localStorage.setItem('rc-theme', mode); + body.classList.remove('theme-light', 'theme-dark'); + + if (mode === 'auto') { + body.classList.add(window.matchMedia('(prefers-color-scheme: dark)').matches ? 'theme-dark' : 'theme-light'); + } else { + body.classList.add(mode === 'dark' ? 'theme-dark' : 'theme-light'); + } + }; + + themeToggle?.addEventListener('click', () => toggleTheme()); + applyTheme(localStorage.getItem('rc-theme') || 'auto'); + + const buildToc = () => { + if (!readingRail || !docCanvas) return; + + const headings = docCanvas.querySelectorAll('h2, h3'); + if (!headings.length) return; + + const toc = document.createElement('nav'); + const list = document.createElement('ul'); + toc.appendChild(list); + + headings.forEach((heading) => { + if (!heading.id) { + heading.id = heading.textContent.toLowerCase().replace(/[^a-z0-9]+/g, '-'); + } + + const item = document.createElement('li'); + const link = document.createElement('a'); + link.href = `#${heading.id}`; + link.textContent = heading.textContent; + link.dataset.tocLink = heading.id; + + link.addEventListener('click', (event) => { + event.preventDefault(); + document.getElementById(heading.id)?.scrollIntoView({ behavior: 'smooth', block: 'start' }); + palette.close(); + }); + + item.appendChild(link); + list.appendChild(item); + }); + + const railToc = document.getElementById('rail-toc'); + if (railToc) { + railToc.innerHTML = ''; + railToc.appendChild(toc); + } + + const observer = new IntersectionObserver((entries) => { + entries.forEach((entry) => { + const link = document.querySelector(`[data-toc-link="${entry.target.id}"]`); + if (!link) return; + if (entry.isIntersecting) { + document.querySelectorAll('[data-toc-link]').forEach((node) => node.classList.remove('is-active')); + link.classList.add('is-active'); + } + }); + }, { + rootMargin: '-40% 0px -45% 0px', + threshold: 0.1 + }); + + headings.forEach((heading) => observer.observe(heading)); + }; + + const hydrateActiveNav = () => { + const page = document.body.dataset.page; + if (!page) return; + navLinks.forEach((link) => { + const href = link.getAttribute('href'); + if (!href) return; + const normalizedHref = href.replace(window.location.origin, '').replace(/index\.html$/, '/'); + const normalizedPage = page.replace(/index\.html$/, '/'); + if (normalizedHref === normalizedPage) { + link.classList.add('is-active'); + link.closest('[data-stack]')?.setAttribute('data-active', 'true'); + } + }); + }; + + const installNavData = () => { + if (!window.__RC_NAV__) { + window.__RC_NAV__ = navData; + } + }; + + installNavData(); + initializeShortcuts(); + renderCommandPalette(''); + buildToc(); + hydrateActiveNav(); + + window.addEventListener('resize', () => { + if (window.innerWidth > 980) { + nav?.removeAttribute('data-open'); + } + }); +})(); + diff --git a/docs/guides/AI-Provider-Comparison.md b/docs/guides/AI-Provider-Comparison.md new file mode 100644 index 0000000..70f2c45 --- /dev/null +++ b/docs/guides/AI-Provider-Comparison.md @@ -0,0 +1,171 @@ +# AI Provider Comparison + +## Overview + +FlowVision now supports **three AI provider options**. Choose the one that fits your needs! + +## 📊 Quick Comparison + +| Feature | Azure OpenAI | LM Studio (Local) | Azure Foundry | +|---------|--------------|-------------------|---------------| +| **Cost** | 💰 Pay per use | ✅ **FREE** | 💰 Pay per use | +| **Privacy** | ⚠️ Cloud-based | ✅ **100% Local** | ⚠️ Cloud-based | +| **Internet Required** | ✅ Yes | ✅ **No** (after setup) | ✅ Yes | +| **Setup Difficulty** | 🟢 Easy | 🟡 Moderate | 🟢 Easy | +| **Response Speed** | 🟢 Fast | 🟡 Depends on hardware | 🟢 Fast | +| **Response Quality** | 🟢 Excellent | 🟡 Good to Excellent | 🟢 Excellent | +| **Tool Calling** | ✅ Full support | ✅ **Supported** | ✅ Full support | +| **API Limits** | ⚠️ Rate limits apply | ✅ **No limits** | ⚠️ Rate limits apply | +| **Model Choice** | Limited to Azure | ✅ **Any LM Studio model** | Limited to provider | +| **Offline Mode** | ❌ No | ✅ **YES** | ❌ No | + +## 🎯 Use Case Recommendations + +### Choose **LM Studio** if you need: +- ✅ **Complete privacy** and data security +- ✅ **Zero costs** regardless of usage +- ✅ **Offline operation** (no internet needed) +- ✅ **Sensitive data** handling +- ✅ **Unlimited usage** without rate limits + +**Best for**: Privacy-conscious users, offline work, testing, learning + +### Choose **Azure OpenAI** if you need: +- ✅ **Best performance** without local hardware +- ✅ **Enterprise support** and SLA +- ✅ **Consistent quality** across requests +- ✅ **No setup hassle** - works immediately +- ✅ **Latest models** (GPT-4, etc.) + +**Best for**: Production use, enterprise apps, guaranteed uptime + +### Choose **Azure Foundry** if you need: +- ✅ **Free tier** for testing +- ✅ **Variety of models** to choose from +- ✅ **GitHub integration** +- ✅ **Easy setup** with GitHub account + +**Best for**: Developers, open-source projects, testing different models + +## 💻 System Requirements + +### Azure OpenAI +- **RAM**: N/A (cloud service) +- **Storage**: None +- **Internet**: Required +- **Hardware**: Any PC + +### LM Studio +- **RAM**: 16GB minimum, 32GB+ recommended +- **Storage**: 2GB - 50GB for models +- **GPU**: Optional but recommended for speed +- **Internet**: Required for initial download only + +### Azure Foundry +- **RAM**: N/A (cloud service) +- **Storage**: None +- **Internet**: Required +- **Hardware**: Any PC + +## 💰 Cost Comparison + +### Scenario: 1,000 requests/day (30,000/month) + +| Provider | Cost Estimate | Notes | +|----------|--------------|-------| +| **LM Studio** | **$0/month** 🎉 | Free! Only electricity cost | +| **Azure OpenAI** | $30-150/month | Depends on model and usage | +| **GitHub Models** | Free tier → Paid | Limited free tier | + +## ⚡ Performance Comparison + +### Response Time (Average) + +| Provider | Simple Query | Tool Calling | Long Response | +|----------|--------------|--------------|---------------| +| **Azure OpenAI** | 1-2s | 2-4s | 5-10s | +| **LM Studio (16GB)** | 2-5s | 5-10s | 10-30s | +| **LM Studio (32GB+GPU)** | 1-3s | 3-6s | 5-15s | +| **Azure Foundry** | 1-3s | 3-5s | 5-12s | + +## 🔐 Privacy & Security + +### Data Flow + +**Azure OpenAI:** +``` +Your Data → Azure Cloud → Processed → Returned +``` + +**LM Studio:** +``` +Your Data → Your Computer → Processed → Stays Local +``` + +**Azure Foundry:** +``` +Your Data → Azure/GitHub Cloud → Processed → Returned +``` + +## 🎮 Switching Between Providers + +FlowVision automatically switches based on your configuration: + +``` +Priority Order: +1. LM Studio (if enabled) +2. Multi-Agent Mode (if enabled) +3. Azure OpenAI (default) +``` + +**No restart required!** Changes take effect immediately. + +## 📈 Recommendations by Role + +### For **Developers**: +**Primary**: LM Studio (free, unlimited testing) +**Backup**: Azure OpenAI (production deployments) + +### For **Enterprises**: +**Primary**: Azure OpenAI (SLA, support, scale) +**Secondary**: LM Studio (internal tools, testing) + +### For **Privacy-Conscious Users**: +**Only**: LM Studio (complete data control) + +### For **Students/Learners**: +**Primary**: LM Studio (free, learn AI concepts) +**Alternative**: Azure Foundry (free tier) + +## 💡 Pro Tip: Hybrid Approach + +**Best of Both Worlds:** + +1. **Development/Testing**: Use LM Studio + - Unlimited testing + - No costs + - Fast iteration + +2. **Production**: Use Azure OpenAI + - Consistent performance + - Enterprise support + - Latest models + +3. **Sensitive Data**: Always use LM Studio + - Complete privacy + - Local processing + - Your data stays yours + +## 🚀 Getting Started + +### LM Studio Setup: **5 minutes** +See: [LM Studio Quickstart](LM-Studio-Quickstart.md) + +### Azure OpenAI Setup: **10 minutes** +Already configured in FlowVision + +### Switch anytime with one checkbox! ✅ + +--- + +**Choose what works best for YOUR needs!** 🎯 \ No newline at end of file diff --git a/docs/guides/LM-Studio-Quickstart.md b/docs/guides/LM-Studio-Quickstart.md new file mode 100644 index 0000000..1e22908 --- /dev/null +++ b/docs/guides/LM-Studio-Quickstart.md @@ -0,0 +1,189 @@ +# LM Studio Quick Start - 5 Minutes to Local AI! 🚀 + +## What You Get +- ✅ **Run AI locally** on your machine +- ✅ **Complete privacy** - no data sent to cloud +- ✅ **No API costs** - completely free +- ✅ **Works offline** after setup +- ✅ **Same features** as Azure OpenAI + +## Step 1: Download LM Studio (2 min) + +1. Go to **https://lmstudio.ai/** +2. Click **Download** for your OS (Windows/Mac/Linux) +3. Install and launch LM Studio + +## Step 2: Get a Model (3 min) + +### Recommended Starting Model: **Hermes-2-Pro-Mistral-7B** + +In LM Studio: +1. Click the **🔍 Search** icon (top left) +2. Search for: **`hermes-2-pro`** +3. Find: **`NousResearch/Hermes-2-Pro-Mistral-7B-GGUF`** +4. Click **Download** next to **`Q4_K_M`** version (~4GB) +5. Wait for download to complete + +### Alternative Quick Models: +- **Fast & Lightweight**: `Phi-2-GGUF` (~1.6GB) - Great for testing +- **Best Quality**: `Mixtral-8x7B-Instruct-GGUF` (~26GB) - Needs 32GB+ RAM + +## Step 3: Start LM Studio Server (30 sec) + +1. In LM Studio, click **💻 Local Server** tab (left sidebar) +2. Select your downloaded model from the dropdown +3. Click **Start Server** button +4. Wait for "Server started on http://localhost:1234" +5. **Keep LM Studio running!** + +## Step 4: Configure FlowVision (1 min) + +### Option A: Using the UI (Easiest) +1. Open FlowVision +2. Go to **Settings** → **LM Studio Configuration** +3. Check ☑ **"Enable LM Studio"** +4. Click **"Test Connection"** (should show ✓ success) +5. Click **"Save"** + +### Option B: Manual Configuration +Create file: `%APPDATA%\FlowVision\lmstudioconfig.json` +```json +{ + "EndpointURL": "http://localhost:1234/v1", + "ModelName": "local-model", + "APIKey": "lm-studio", + "Enabled": true, + "Temperature": 0.7, + "MaxTokens": 2048, + "TimeoutSeconds": 300 +} +``` + +## Step 5: Test It! (30 sec) + +1. In FlowVision, type: **"What is 2+2?"** +2. You should see: **"Local AI response"** indicator +3. Get a response from your local model! + +## ✅ Success Indicators + +You'll know it's working when you see: +- ✅ "LM Studio Action Execution" in the task indicator +- ✅ "Processing your request with local AI" message +- ✅ "Local AI response" during generation +- ✅ Responses coming without internet connection + +## 🎯 Try These Commands + +### Simple Test +``` +"Tell me a joke" +``` + +### Tool Calling Test +``` +"Open Notepad" +"What windows are open?" +"Create a file called test.txt with the content 'Hello World'" +``` + +### Code Generation +``` +"Write a Python function to calculate fibonacci numbers" +``` + +## 🐛 Troubleshooting + +### "Cannot connect to LM Studio" +**Fix**: +1. Make sure LM Studio is running +2. Check that server is started (green "Stop Server" button visible) +3. Verify endpoint: `http://localhost:1234/v1` + +### Model is too slow +**Fix**: +1. Try a smaller model (Phi-2) +2. Enable GPU acceleration in LM Studio settings +3. Close other applications +4. Reduce Max Tokens to 1024 + +### "Connection timeout" +**Fix**: +1. First request is slower (model loading) +2. Increase timeout in config (300 → 600 seconds) +3. Wait a bit longer +4. Check CPU/RAM usage + +### Test Connection shows error +**Fix**: +1. Restart LM Studio server +2. Reload model in LM Studio +3. Check firewall isn't blocking port 1234 +4. Try changing port in LM Studio server settings + +## 📊 Performance Tips + +### For Faster Responses: +- **Temperature**: 0.2 (more focused) +- **Max Tokens**: 1024 (shorter responses) +- **Model**: Use Phi-2 or TinyLlama + +### For Better Quality: +- **Temperature**: 0.7-0.8 (more creative) +- **Max Tokens**: 2048-4096 (longer responses) +- **Model**: Use Mixtral or Llama-2-13B + +### For Tool Calling: +- **Best Model**: Hermes-2-Pro-Mistral-7B +- **Temperature**: 0.3-0.5 +- **Max Tokens**: 1024 + +## 🔄 Switching Between Local and Azure + +### Enable Local AI: +Check ☑ "Enable LM Studio" in settings + +### Disable (Use Azure): +Uncheck ☐ "Enable LM Studio" in settings + +**No restart needed!** FlowVision automatically switches. + +## 💡 Pro Tips + +1. **Keep LM Studio running** - Don't close it while using FlowVision +2. **First request is slow** - Model loading takes time, be patient +3. **Monitor resources** - Check Task Manager for CPU/RAM usage +4. **Try different models** - Each has different strengths +5. **Use GPU** - Enable in LM Studio settings for huge speed boost + +## 📱 Quick Reference + +| Setting | Recommended | Fast | Quality | +|---------|------------|------|---------| +| Model | Hermes-2-Pro-7B | Phi-2 | Mixtral-8x7B | +| Temperature | 0.5 | 0.2 | 0.7 | +| Max Tokens | 2048 | 1024 | 4096 | +| RAM Needed | 16GB | 8GB | 32GB+ | + +## 🎉 That's It! + +You're now running AI locally! + +**Next Steps:** +- Try different models +- Experiment with settings +- Test tool calling features +- Read full documentation: [LM Studio Integration Guide](LM-Studio-Integration.md) + +## 🆘 Need Help? + +1. Check full guide: [LM Studio Integration Guide](LM-Studio-Integration.md) +2. LM Studio docs: https://lmstudio.ai/docs +3. LM Studio Discord: https://discord.gg/lmstudio +4. GitHub Issues: (your repo) + +--- + +**Enjoy your private, local AI!** 🎊 + +No more API costs! No more cloud dependency! Complete control! 🚀 \ No newline at end of file diff --git a/docs/guides/OmniParser-Setup.md b/docs/guides/OmniParser-Setup.md new file mode 100644 index 0000000..6830493 --- /dev/null +++ b/docs/guides/OmniParser-Setup.md @@ -0,0 +1,443 @@ +# OmniParser Quick Start Guide 🚀 + +## TL;DR - Just Use It! ✨ + +**OmniParser is now fully automatic!** No server management needed. + +``` +You: "Capture and parse the screen" +FlowVision: [Auto-starts server if needed] +FlowVision: [Returns parsed UI elements] +``` + +**That's it!** 🎊 + +--- + +## What Changed + +### Before ❌ +```bash +# Terminal 1 +cd T:\OmniParser\omnitool\omniparserserver +python -m omniparserserver --som_model_path ... +[Keep this terminal open forever] + +# Terminal 2 - FlowVision +[Configure server URL] +[Use FlowVision] +``` + +### After ✅ +```bash +# Just use FlowVision - it handles everything! +[Launch FlowVision] +[Use screen capture] +[Done!] +``` + +--- + +## First Time Setup + +### One-Time: Install OmniParser + +**1. Clone OmniParser (if not already installed):** +```bash +cd T:\ +git clone https://github.com/microsoft/OmniParser.git +``` + +**2. Install Python Dependencies:** +```bash +cd T:\OmniParser +pip install -r requirements.txt +``` + +**3. Download Model Weights:** +```bash +huggingface-cli download microsoft/OmniParser-v2.0 --local-dir weights +``` + +**Done!** ✓ + +### Verify Installation + +Check that these exist: +- `T:\OmniParser\omnitool\omniparserserver\omniparserserver.py` ✓ +- `T:\OmniParser\weights\icon_detect\model.pt` ✓ +- `T:\OmniParser\weights\icon_caption_florence\` ✓ + +--- + +## Usage + +### Basic Usage (Most Common) + +**Just use screen capture - FlowVision does the rest!** + +``` +You: "Capture the current screen and tell me what you see" +``` + +**First Time:** +- Takes ~15-20 seconds (server startup + model loading) +- Shows status: "Starting local OmniParser server..." + +**After First Time:** +- Takes ~2-3 seconds (server already running) +- Instant processing! + +### What Happens Automatically + +``` +1. You request screen capture + ↓ +2. FlowVision checks: Server running? + ↓ No +3. FlowVision starts Python server + ↓ +4. Server loads models (~15 seconds) + ↓ +5. FlowVision verifies server ready + ↓ +6. Processes your screenshot + ↓ +7. Returns parsed UI elements + ↓ +8. Server stays running for next time! +``` + +--- + +## Performance + +### Timing + +**First Request (Cold Start):** +``` +Server startup: ~15 seconds +Processing: ~3 seconds +────────────────────────────── +Total: ~18 seconds +``` + +**Subsequent Requests:** +``` +Processing: ~3 seconds ✓ +``` + +**With GPU (if configured):** +``` +Processing: ~1 second ✓✓ +``` + +### Making It Faster + +**Option 1: Keep FlowVision Running** +- Server stays active +- All captures are fast (~3 seconds) + +**Option 2: Use GPU** +1. Edit `LocalOmniParserManager.cs` +2. Change `--device cpu` to `--device cuda` +3. Rebuild FlowVision +4. Result: ~1 second per screenshot! + +--- + +## Troubleshooting + +### "OmniParser server not available" + +**Check installation:** +```bash +# Verify these exist: +dir T:\OmniParser\omnitool\omniparserserver\omniparserserver.py +dir T:\OmniParser\weights\icon_detect\model.pt +dir T:\OmniParser\weights\icon_caption_florence +``` + +**Check Python:** +```bash +python --version # Should be 3.12+ +pip list | findstr torch # Should show PyTorch +``` + +### "Server starting but timing out" + +**First time takes longer** - model loading can take 20-30 seconds. +- Wait patiently +- Check FlowVision logs for "OmniParser-Server" messages +- Verify models downloaded correctly + +### "Port 8080 already in use" + +**Option 1: Stop other service** +```bash +netstat -ano | findstr :8080 +taskkill /PID /F +``` + +**Option 2: Use different port** +```csharp +// In your code +LocalOmniParserManager.Configure(port: 8081); +``` + +### Get Detailed Diagnostics + +```csharp +string info = LocalOmniParserManager.GetDiagnostics(); +Console.WriteLine(info); +``` + +Shows: +- Installation path +- Server script status +- Weights folder status +- Server running status +- Process status + +--- + +## Advanced Configuration + +### Custom OmniParser Location + +**If not at T:\OmniParser:** +```csharp +LocalOmniParserManager.Configure( + omniParserPath: @"C:\MyPath\OmniParser" +); +``` + +### Custom Python Executable + +**If using specific Python:** +```csharp +LocalOmniParserManager.Configure( + pythonExe: @"C:\Python312\python.exe" +); +``` + +### Using Conda Environment + +**If using conda:** +```csharp +LocalOmniParserManager.Configure( + pythonExe: @"C:\Users\YourName\miniconda3\envs\omni\python.exe" +); +``` + +### Custom Port + +**If port conflict:** +```csharp +LocalOmniParserManager.Configure( + port: 8081 +); +``` + +--- + +## API Reference + +### Check Server Status + +```csharp +bool isRunning = await LocalOmniParserManager.IsServerRunningAsync(); +if (isRunning) { + Console.WriteLine("Server is ready!"); +} +``` + +### Ensure Server Running + +```csharp +bool started = await LocalOmniParserManager.EnsureServerRunningAsync(); +if (started) { + // Server is now running, proceed with capture +} +``` + +### Stop Server + +```csharp +LocalOmniParserManager.StopServer(); +// Server stopped (will auto-restart on next capture) +``` + +### Get Diagnostics + +```csharp +string diagnostics = LocalOmniParserManager.GetDiagnostics(); +Console.WriteLine(diagnostics); +``` + +Output: +``` +OmniParser Diagnostics: + Installation Path: T:\OmniParser + Server Script: ✓ Found + Weights Folder: ✓ Found + Python Executable: python + Server URL: http://127.0.0.1:8080 + Server Running: ✓ Yes + Process Active: ✓ Yes +``` + +--- + +## Tips & Tricks + +### 1. First Use is Slow +- Normal! Server startup + model loading +- Takes ~15-20 seconds first time +- Subsequent captures are fast (~2-3 seconds) + +### 2. Keep Server Running +- Server stays active between captures +- No need to restart +- Much faster for multiple captures + +### 3. Check Logs +- FlowVision logs show server output +- Look for "OmniParser-Server" entries +- Helps debug issues + +### 4. GPU Acceleration +- Edit LocalOmniParserManager.cs +- Change `--device cpu` to `--device cuda` +- Requires CUDA-capable GPU +- ~1 second per screenshot! + +### 5. Batch Processing +- Server handles multiple requests +- Process many screenshots efficiently +- No per-request overhead + +--- + +## Common Workflows + +### Single Screenshot + +``` +You: "Capture and analyze the screen" +FlowVision: [Ensures server running] +FlowVision: [Captures and processes] +FlowVision: "I see: [UI elements]" +``` + +### Multiple Screenshots + +``` +You: "Capture the screen" +FlowVision: [First: ~15s, starts server] + +You: "Capture again" +FlowVision: [Fast: ~3s, server running] + +You: "And again" +FlowVision: [Fast: ~3s, server running] +``` + +### Different Applications + +``` +You: "Capture Chrome window" +FlowVision: [Processes Chrome UI] + +You: "Now capture VS Code" +FlowVision: [Processes VS Code UI] + +You: "Back to Chrome" +FlowVision: [Processes Chrome UI] +``` + +All fast after first one! + +--- + +## Status Messages + +### You'll See: + +**Starting:** +``` +"Checking local OmniParser server..." +"Starting local OmniParser server..." +"Server started successfully in 15 seconds" +``` + +**Running:** +``` +"Server already running" +"Processing screenshot..." +"Done!" +``` + +**Errors:** +``` +"Failed to start server: [reason]" +"OmniParser not found at T:\OmniParser" +"Check installation and try again" +``` + +--- + +## FAQ + +### Q: Do I need to start the server manually? +**A:** No! FlowVision starts it automatically. + +### Q: Will it restart if it crashes? +**A:** Yes! Next capture will restart it automatically. + +### Q: Can I use it offline? +**A:** Yes! Everything runs locally. + +### Q: Does it use GPU? +**A:** CPU by default. Edit code for GPU. + +### Q: Can I change the port? +**A:** Yes! Use `LocalOmniParserManager.Configure(port: 8081)` + +### Q: What if OmniParser is elsewhere? +**A:** Configure path: `LocalOmniParserManager.Configure(omniParserPath: "C:\MyPath")` + +### Q: How do I know if it's working? +**A:** Check diagnostics: `LocalOmniParserManager.GetDiagnostics()` + +--- + +## Summary + +### What You Get ✨ + +✅ **Automatic** - Server starts when needed +✅ **Fast** - ~3 seconds after first request +✅ **Reliable** - Auto-restart on failure +✅ **Local** - No cloud, works offline +✅ **Private** - All processing on your machine +✅ **Simple** - No manual management + +### What You Need 📋 + +✅ OmniParser at `T:\OmniParser` +✅ Python 3.12+ with dependencies +✅ Model weights downloaded +✅ Port 8080 available (or configure different) + +### How to Use 🚀 + +1. Use FlowVision screen capture +2. That's it! ✓ + +**It just works!** 🎊 + +--- + +**Status:** ✅ Ready to use +**Configuration:** ❌ None needed (optional customization available) \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..4f523ef --- /dev/null +++ b/docs/index.md @@ -0,0 +1,91 @@ +--- +layout: default +title: Home +--- + +# Mission Control Briefing + +Welcome aboard **Recursive Control Mission Control**. This is your hangar, briefing room, and launch pad for orchestrating autonomous Windows workflows with your preferred AI pilots. + +--- + +## Launch Console + +**Pick Your Objective** + +- [Installation Capsule](Installation.html) — deploy the suite locally in minutes +- [First Flight Checklist](Getting-Started.html) — complete your first automation loop +- [UI Features Atlas](UI-Features.html) — map every control surface and telemetry feed + +**Signal Boosts** + +- Discord flight crew → [Join the ops channel](https://discord.gg/mQWsWeHsVU) +- Latest release notes → [Version pulse](https://github.com/flowdevs-io/Recursive-Control/releases) + +--- + +## What You’re Flying + +Recursive Control stitches together a **triad of agents**, a **modular plugin bay**, and **vision-assisted perception** so your instructions arrive as working Windows tasks. + +| Layer | Callsign | Mission | +| --- | --- | --- | +| 🧭 Coordinator | Hermes | Interprets intent, selects strategy | +| 🛠️ Planner | Daedalus | Authors the execution playbook | +| ⚡ Executor | Talos | Operates Windows actions through plugins | + +Plugins extend the aircraft: keyboard and mouse automation, screen intelligence, PowerShell/CMD access, Playwright browser control, remote HTTP bridge, and more. + +--- + +## Flight Patterns + +```text +“Launch Excel, log in to Teams, and stage the daily report template.” +“Sweep Downloads, convert every .png into annotated documentation captures.” +“Open Chrome, authenticate into Jira, and create status tickets for each blocker.” +``` + +Every request is decomposed into atomic moves, verified, and streamed back with commentary. + +--- + +## Build Lab + +```bash +git clone https://github.com/flowdevs-io/Recursive-Control.git +cd Recursive-Control +dotnet restore && dotnet build +``` + +Fork the repository to inject new agents, craft plugins, or integrate your own telemetry. + +--- + +## Radar Timeline + +**Now shipping** + +- Multi-agent orchestration with live reasoning feed +- OmniParser bridge for Florence-powered spatial awareness +- Plugin toggles, profiles, and per-model prompt vibes + +**Locking targets** + +- Reconfigurable task board with shared state between agents +- Gemini, Bedrock, Ollama, Phi4 connectors +- Real-time speech recognition powered by Whisper successors + +--- + +## Support Deck + +- [FAQ Loop](FAQ.html) +- [Troubleshooting Console](Troubleshooting.html) +- [Repo Handbook](README.html) + +Open a [GitHub issue](https://github.com/flowdevs-io/Recursive-Control/issues) for feature requests or turbulence reports. + +--- + +**“Make computers feel like teammates.”** — FlowDevs Flight Core diff --git a/packages.config b/packages.config new file mode 100644 index 0000000..986d72f --- /dev/null +++ b/packages.config @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/weights/icon_detect/model.pt b/weights/icon_detect/model.pt new file mode 100644 index 0000000..f55a310 Binary files /dev/null and b/weights/icon_detect/model.pt differ
Type to search or jump with ↑ ↓ ↵ · Press Esc to close
{{ page.subtitle }}
No matches yet. Try a narrower phrase.