Skip to content

Commit f157b85

Browse files
Add Gemini and local plugin support; UI and config updates
Introduces Google Gemini provider support in AIProviderConfigForm and AIClientFactory, adds Clipboard and FileSystem plugins, and updates ToolConfig to enable these plugins. Refactors Actioner and MultiAgentActioner to use the new AIClientFactory for provider-agnostic chat client creation. Improves LM Studio config validation and UI, adds auto-detect for local AI endpoints, and updates OmniParserForm to reflect embedded ONNX model usage. Also includes minor fixes and enhancements to plugin APIs, OCR helper, and project file resource handling.
1 parent 8345dc6 commit f157b85

18 files changed

Lines changed: 1039 additions & 225 deletions

FlowVision/AIProviderConfigForm.cs

Lines changed: 294 additions & 48 deletions
Large diffs are not rendered by default.

FlowVision/FlowVision.csproj

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@
211211
</Compile>
212212
<Compile Include="lib\Classes\AgentCoordinator.cs" />
213213
<Compile Include="lib\Classes\ai\AgentRole.cs" />
214+
<Compile Include="lib\Classes\ai\AIClientFactory.cs" />
214215
<Compile Include="lib\Classes\ai\LMStudioActioner.cs" />
215216
<Compile Include="lib\Classes\ai\MultiAgentActioner.cs" />
216217
<Compile Include="lib\Classes\LMStudioConfig.cs" />
@@ -250,7 +251,9 @@
250251
<Compile Include="lib\Classes\UI\ActivityMonitor.cs">
251252
<SubType>UserControl</SubType>
252253
</Compile>
254+
<Compile Include="lib\Plugins\ClipboardPlugin.cs" />
253255
<Compile Include="lib\Plugins\CMDPlugin.cs" />
256+
<Compile Include="lib\Plugins\FileSystemPlugin.cs" />
254257
<Compile Include="lib\Plugins\KeyboardPlugin.cs" />
255258
<Compile Include="lib\Plugins\MousePlugin.cs" />
256259
<Compile Include="lib\Plugins\PowershellPlugin.cs" />
@@ -301,6 +304,7 @@
301304
</ItemGroup>
302305
<ItemGroup>
303306
<Content Include="recursive-control-icon.ico" />
307+
<EmbeddedResource Include="Models\icon_detect.onnx" />
304308
</ItemGroup>
305309
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
306310
<Import Project="..\packages\System.ValueTuple.4.6.1\build\net471\System.ValueTuple.targets" Condition="Exists('..\packages\System.ValueTuple.4.6.1\build\net471\System.ValueTuple.targets')" />
@@ -327,13 +331,6 @@
327331
</ItemGroup>
328332
<Copy SourceFiles="@(TesseractNative)" DestinationFolder="$(OutputPath)" SkipUnchangedFiles="true" />
329333
</Target>
330-
<!-- Copy OmniParser model to output directory -->
331-
<Target Name="CopyOmniParserModel" AfterTargets="AfterBuild">
332-
<ItemGroup>
333-
<OmniParserModel Include="Models\icon_detect.onnx" />
334-
</ItemGroup>
335-
<Copy SourceFiles="@(OmniParserModel)" DestinationFolder="$(OutputPath)\models" SkipUnchangedFiles="true" />
336-
</Target>
337334
<Import Project="..\packages\CefSharp.Common.135.0.170\build\CefSharp.Common.targets" Condition="Exists('..\packages\CefSharp.Common.135.0.170\build\CefSharp.Common.targets')" />
338335
<Import Project="..\packages\Tesseract.5.2.0\build\Tesseract.targets" Condition="Exists('..\packages\Tesseract.5.2.0\build\Tesseract.targets')" />
339336
<Import Project="..\packages\Fody.6.9.2\build\Fody.targets" Condition="Exists('..\packages\Fody.6.9.2\build\Fody.targets')" />

FlowVision/OmniParserForm.Designer.cs

Lines changed: 33 additions & 28 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

FlowVision/OmniParserForm.cs

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,19 @@
11
using System;
2-
using System.Collections.Generic;
3-
using System.ComponentModel;
4-
using System.Data;
5-
using System.Drawing;
6-
using System.Linq;
7-
using System.Text;
8-
using System.Threading.Tasks;
92
using System.Windows.Forms;
103
using FlowVision.lib.Classes;
114

125
namespace FlowVision
136
{
147
public partial class OmniParserForm : Form
158
{
16-
private OmniParserConfig _config;
17-
189
public OmniParserForm()
1910
{
2011
InitializeComponent();
21-
_config = OmniParserConfig.LoadConfig();
22-
}
23-
24-
private void saveButton_Click(object sender, EventArgs e)
25-
{
26-
string url = omniParserServerURL.Text.Trim();
27-
if (string.IsNullOrEmpty(url))
28-
{
29-
MessageBox.Show("Please enter a valid URL.");
30-
return;
31-
}
32-
33-
// Save the URL to the config file
34-
_config.ServerURL = url;
35-
_config.SaveConfig();
36-
37-
// Optionally, you can close the form after saving
38-
this.Close();
3912
}
4013

4114
private void OmniParserForm_Load(object sender, EventArgs e)
4215
{
43-
// Load the URL from the config file
44-
omniParserServerURL.Text = _config.ServerURL;
16+
// Form is now just an informational status display
4517
}
4618
}
4719
}

FlowVision/Program.cs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@ static class Program
1414
[STAThread]
1515
static void Main()
1616
{
17+
// Preload the ONNX model on a background thread so it's ready when needed
18+
Task.Run(() =>
19+
{
20+
try
21+
{
22+
// Accessing the Instance property triggers the model loading
23+
var parser = FlowVision.lib.Classes.SimpleOmniParser.Instance;
24+
}
25+
catch
26+
{
27+
// Ignore startup errors - they will be caught/logged when the user actually tries to use it
28+
}
29+
});
30+
1731
Application.EnableVisualStyles();
1832
Application.SetCompatibleTextRenderingDefault(false);
1933
Application.Run(new Form1());

FlowVision/lib/Classes/LMStudioConfig.cs

Lines changed: 34 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -4,93 +4,76 @@
44

55
namespace FlowVision.lib.Classes
66
{
7-
/// <summary>
8-
/// Configuration for LM Studio local AI integration
9-
/// </summary>
107
public class LMStudioConfig
118
{
12-
/// <summary>
13-
/// LM Studio server endpoint (default: http://localhost:1234/v1)
14-
/// </summary>
159
public string EndpointURL { get; set; } = "http://localhost:1234/v1";
16-
17-
/// <summary>
18-
/// Model name (e.g., "gpt-3.5-turbo", "local-model", or whatever LM Studio shows)
19-
/// Can be left as default - LM Studio typically uses the loaded model automatically
20-
/// </summary>
2110
public string ModelName { get; set; } = "local-model";
22-
23-
/// <summary>
24-
/// API key (LM Studio doesn't require one, but field kept for compatibility)
25-
/// Use "lm-studio" or "not-needed" as placeholder
26-
/// </summary>
27-
public string APIKey { get; set; } = "lm-studio";
28-
29-
/// <summary>
30-
/// Whether to use LM Studio or fall back to Azure
31-
/// </summary>
32-
public bool Enabled { get; set; } = false;
33-
34-
/// <summary>
35-
/// Temperature setting for local model
36-
/// </summary>
3711
public double Temperature { get; set; } = 0.7;
38-
39-
/// <summary>
40-
/// Max tokens for completion
41-
/// </summary>
4212
public int MaxTokens { get; set; } = 2048;
43-
44-
/// <summary>
45-
/// Timeout in seconds for LM Studio requests
46-
/// </summary>
47-
public int TimeoutSeconds { get; set; } = 300;
13+
public bool Enabled { get; set; } = false;
14+
public string APIKey { get; set; } = "lm-studio"; // OpenAI client requires a key even if local
15+
public int TimeoutSeconds { get; set; } = 120;
4816

49-
private static string ConfigFilePath()
50-
{
51-
return Path.Combine(
52-
Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData),
53-
"FlowVision",
54-
"lmstudioconfig.json");
55-
}
17+
// Track if the config was successfully loaded from disk
18+
[System.Text.Json.Serialization.JsonIgnore]
19+
public bool IsValid { get; set; } = true;
20+
21+
private static string ConfigFilePath => Path.Combine(
22+
Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData),
23+
"FlowVision",
24+
"lmstudioconfig.json");
5625

5726
public static LMStudioConfig LoadConfig()
5827
{
5928
try
6029
{
61-
Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath()));
30+
Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath));
6231

63-
if (File.Exists(ConfigFilePath()))
32+
if (File.Exists(ConfigFilePath))
6433
{
65-
string jsonContent = File.ReadAllText(ConfigFilePath());
66-
if (!string.IsNullOrWhiteSpace(jsonContent))
34+
string jsonContent = File.ReadAllText(ConfigFilePath);
35+
// Basic validation for empty file
36+
if (string.IsNullOrWhiteSpace(jsonContent))
6737
{
68-
var config = JsonSerializer.Deserialize<LMStudioConfig>(jsonContent);
69-
return config ?? new LMStudioConfig();
38+
return new LMStudioConfig { IsValid = false };
39+
}
40+
41+
var config = JsonSerializer.Deserialize<LMStudioConfig>(jsonContent);
42+
if (config != null)
43+
{
44+
config.IsValid = true;
45+
return config;
7046
}
7147
}
7248
}
7349
catch (Exception ex)
7450
{
75-
Console.WriteLine($"Error loading LM Studio config: {ex.Message}");
51+
// Return a config marked as invalid so the UI can warn the user
52+
return new LMStudioConfig
53+
{
54+
Enabled = false,
55+
IsValid = false
56+
};
7657
}
7758

59+
// Return default if file doesn't exist
7860
return new LMStudioConfig();
7961
}
8062

8163
public void SaveConfig()
8264
{
8365
try
8466
{
85-
Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath()));
67+
Directory.CreateDirectory(Path.GetDirectoryName(ConfigFilePath));
8668

8769
var options = new JsonSerializerOptions { WriteIndented = true };
8870
string jsonContent = JsonSerializer.Serialize(this, options);
89-
File.WriteAllText(ConfigFilePath(), jsonContent);
71+
File.WriteAllText(ConfigFilePath, jsonContent);
9072
}
9173
catch (Exception ex)
9274
{
9375
Console.WriteLine($"Error saving LM Studio config: {ex.Message}");
76+
throw; // Re-throw so the UI can show the error
9477
}
9578
}
9679
}

FlowVision/lib/Classes/OcrHelper.cs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,54 @@ private static void Initialize()
8282
}
8383
}
8484

85+
/// <summary>
86+
/// Search for specific text in an image and return its bounding box.
87+
/// Returns the first match found.
88+
/// </summary>
89+
public static async Task<Rectangle?> FindTextLocationAsync(Bitmap image, string searchText)
90+
{
91+
if (!_isAvailable || _engine == null || string.IsNullOrWhiteSpace(searchText))
92+
return null;
93+
94+
return await Task.Run(() =>
95+
{
96+
try
97+
{
98+
lock (_lock)
99+
{
100+
using (var pix = PixConverter.ToPix(image))
101+
using (var page = _engine.Process(pix))
102+
using (var iter = page.GetIterator())
103+
{
104+
iter.Begin();
105+
do
106+
{
107+
// Get text at current iterator level (Word)
108+
string currentText = iter.GetText(PageIteratorLevel.Word)?.Trim();
109+
110+
// Simple case-insensitive match
111+
if (!string.IsNullOrWhiteSpace(currentText) &&
112+
currentText.Equals(searchText, StringComparison.OrdinalIgnoreCase))
113+
{
114+
// Found exact match! Get bounding box.
115+
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var rect))
116+
{
117+
return (Rectangle?)new Rectangle(rect.X1, rect.Y1, rect.Width, rect.Height);
118+
}
119+
}
120+
} while (iter.Next(PageIteratorLevel.Word));
121+
}
122+
}
123+
return (Rectangle?)null;
124+
}
125+
catch (Exception ex)
126+
{
127+
PluginLogger.LogError("OcrHelper", "FindTextLocationAsync", $"OCR search failed: {ex.Message}");
128+
return (Rectangle?)null;
129+
}
130+
});
131+
}
132+
85133
/// <summary>
86134
/// Extract text from a bitmap image
87135
/// </summary>

0 commit comments

Comments
 (0)