Skip to content

Commit 9a8b5b9

Browse files
committed
v0.5.0 - Supertonic + Whisper support, UI Scale mode for 8k/4k displays
1 parent 95580c1 commit 9a8b5b9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+2542
-124
lines changed

DiffuseApp/DiffuseApp.Common/DiffuseApp.Common.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
<ItemGroup>
1616
<PackageReference Include="CommandLineParser" Version="2.9.1" />
1717
<PackageReference Include="CommunityToolkit.HighPerformance" Version="8.4.0" />
18-
<PackageReference Include="Microsoft.Extensions.Hosting" Version="9.0.7" />
19-
<PackageReference Include="ZstdSharp.Port" Version="0.8.6" />
18+
<PackageReference Include="Microsoft.Extensions.Hosting" Version="10.0.2" />
19+
<PackageReference Include="ZstdSharp.Port" Version="0.8.7" />
2020
</ItemGroup>
2121

2222
<!--Binaries-->

DiffuseApp/DiffuseApp.Common/PipelineServer.cs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ public sealed class PipelineServer : IDisposable
2323
private readonly NamedPipeServerStream _progressChannel;
2424
private readonly Channel<PipelineProgress> _progressQueue;
2525
private readonly IProgress<PipelineProgress> _progressCallback;
26-
private readonly CancellationTokenSource _serverCancellation;
2726
private CancellationTokenSource _pipelineCancellation;
2827

2928
/// <summary>
@@ -245,7 +244,6 @@ private async Task StartServerAsync(PipelineRequest request, CancellationToken c
245244
/// <param name="cancellationToken">The cancellation token.</param>
246245
private async Task StopServerAsync(PipelineRequest request, CancellationToken cancellationToken)
247246
{
248-
_serverCancellation?.SafeCancel();
249247
await _pipelineChannel.SendResponse(cancellationToken);
250248
_logger.LogInformation($"[PipelineServer] [StopServer] Server stopped.");
251249
}
@@ -430,8 +428,6 @@ public void Dispose()
430428
{
431429
_pipelineCancellation?.SafeCancel();
432430
_pipelineCancellation?.Dispose();
433-
_serverCancellation?.SafeCancel();
434-
_serverCancellation?.Dispose();
435431
_progressChannel?.Dispose();
436432
_commandChannel?.Dispose();
437433
_pipelineChannel?.Dispose();

DiffuseApp/DiffuseApp.Server/DiffuseApp.Server.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
<PackageReference Include="Serilog.Extensions.Logging" Version="10.0.0" />
2626
<PackageReference Include="Serilog.Sinks.File" Version="7.0.0" />
2727
<PackageReference Include="CommunityToolkit.HighPerformance" Version="8.4.0" />
28-
<PackageReference Include="ZstdSharp.Port" Version="0.8.6" />
28+
<PackageReference Include="ZstdSharp.Port" Version="0.8.7" />
2929
</ItemGroup>
3030

3131
<!--Binaries-->

DiffuseApp/DiffuseApp/App.xaml

Lines changed: 158 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
xmlns:CommonControls="clr-namespace:TensorStack.WPF.Controls;assembly=TensorStack.WPF"
88
xmlns:PythonOptions="clr-namespace:TensorStack.Python.Common;assembly=TensorStack.Python"
99
xmlns:Extractors="clr-namespace:TensorStack.Extractors.Common;assembly=TensorStack.Extractors"
10+
xmlns:Whisper="clr-namespace:TensorStack.TextGeneration.Pipelines.Whisper;assembly=TensorStack.TextGeneration"
11+
xmlns:TextGeneration="clr-namespace:TensorStack.TextGeneration.Common;assembly=TensorStack.TextGeneration"
1012
xmlns:Common="clr-namespace:Diffuse.Common"
1113
ShutdownMode="OnMainWindowClose">
1214
<Application.Resources>
@@ -178,6 +180,24 @@
178180
</ObjectDataProvider.MethodParameters>
179181
</ObjectDataProvider>
180182

183+
<ObjectDataProvider x:Key="TaskType" MethodName="GetValues" ObjectType="{x:Type System:Enum}">
184+
<ObjectDataProvider.MethodParameters>
185+
<x:Type TypeName="Whisper:TaskType"/>
186+
</ObjectDataProvider.MethodParameters>
187+
</ObjectDataProvider>
188+
189+
<ObjectDataProvider x:Key="LanguageType" MethodName="GetValues" ObjectType="{x:Type System:Enum}">
190+
<ObjectDataProvider.MethodParameters>
191+
<x:Type TypeName="Whisper:LanguageType"/>
192+
</ObjectDataProvider.MethodParameters>
193+
</ObjectDataProvider>
194+
195+
196+
<ObjectDataProvider x:Key="EarlyStopping" MethodName="GetValues" ObjectType="{x:Type System:Enum}">
197+
<ObjectDataProvider.MethodParameters>
198+
<x:Type TypeName="TextGeneration:EarlyStopping"/>
199+
</ObjectDataProvider.MethodParameters>
200+
</ObjectDataProvider>
181201

182202
<Style x:Key="ImageDropZoneStyle" TargetType="{x:Type Border}">
183203
<Setter Property="AllowDrop" Value="False"/>
@@ -240,6 +260,46 @@
240260
</Style>
241261

242262

263+
<Style x:Key="AudioDropZoneStyle" TargetType="{x:Type Border}">
264+
<Setter Property="AllowDrop" Value="False"/>
265+
<Setter Property="BorderBrush" Value="Transparent"/>
266+
<Setter Property="BorderThickness" Value="1"/>
267+
<Style.Triggers>
268+
<MultiDataTrigger>
269+
<MultiDataTrigger.Conditions>
270+
<Condition Binding="{Binding IsEnabled, RelativeSource={RelativeSource Self}}" Value="True" />
271+
<Condition Binding="{Binding IsDragDrop, RelativeSource={RelativeSource AncestorType=CommonControls:ViewControl}}" Value="True" />
272+
<Condition Binding="{Binding DragDropType, RelativeSource={RelativeSource AncestorType=CommonControls:ViewControl}}" Value="Audio" />
273+
</MultiDataTrigger.Conditions>
274+
<MultiDataTrigger.Setters>
275+
<Setter Property="AllowDrop" Value="True"/>
276+
<Setter Property="BorderBrush" Value="{StaticResource AccentColour2}"/>
277+
</MultiDataTrigger.Setters>
278+
</MultiDataTrigger>
279+
</Style.Triggers>
280+
</Style>
281+
282+
283+
<Style x:Key="TextDropZoneStyle" TargetType="{x:Type Border}">
284+
<Setter Property="AllowDrop" Value="False"/>
285+
<Setter Property="BorderBrush" Value="Transparent"/>
286+
<Setter Property="BorderThickness" Value="1"/>
287+
<Style.Triggers>
288+
<MultiDataTrigger>
289+
<MultiDataTrigger.Conditions>
290+
<Condition Binding="{Binding IsEnabled, RelativeSource={RelativeSource Self}}" Value="True" />
291+
<Condition Binding="{Binding IsDragDrop, RelativeSource={RelativeSource AncestorType=CommonControls:ViewControl}}" Value="True" />
292+
<Condition Binding="{Binding DragDropType, RelativeSource={RelativeSource AncestorType=CommonControls:ViewControl}}" Value="Text" />
293+
</MultiDataTrigger.Conditions>
294+
<MultiDataTrigger.Setters>
295+
<Setter Property="AllowDrop" Value="True"/>
296+
<Setter Property="BorderBrush" Value="{StaticResource AccentColour2}"/>
297+
</MultiDataTrigger.Setters>
298+
</MultiDataTrigger>
299+
</Style.Triggers>
300+
</Style>
301+
302+
243303
<Style x:Key="ToggleButtonBasic" TargetType="{x:Type ToggleButton}">
244304
<Setter Property="FocusVisualStyle" Value="{StaticResource FocusVisual}"/>
245305
<Setter Property="Background" Value="{StaticResource ComboBoxBackground}"/>
@@ -308,7 +368,7 @@
308368
</Style>
309369

310370
<Style x:Key="MenuIconButton" TargetType="{x:Type CommonControls:IconButton}" BasedOn="{StaticResource {x:Type CommonControls:IconButton}}">
311-
<Setter Property="Width" Value="300"/>
371+
<Setter Property="Width" Value="250"/>
312372
<Setter Property="Orientation" Value="Horizontal"/>
313373
<Setter Property="Placement" Value="Bottom"/>
314374
<Setter Property="FontSize" Value="14"/>
@@ -822,7 +882,104 @@
822882
</DataTemplate>
823883

824884

885+
<DataTemplate DataType="{x:Type Common:AudioHistory}">
886+
<DockPanel Margin="10,0,0,0">
887+
<TextBlock DockPanel.Dock="Top" Text="{Binding MediaType, StringFormat={}Generated {0}}" FontSize="20"/>
888+
<StackPanel>
889+
<Separator Opacity=".7"/>
890+
<UniformGrid Columns="3" Margin="0,6,0,0">
891+
<StackPanel>
892+
<TextBlock Text="Voice Style" Style="{StaticResource FieldTextBlockStyle}" />
893+
<TextBlock>
894+
<Run Text="{Binding Options.VoiceStyle}" />
895+
</TextBlock>
896+
</StackPanel>
897+
<StackPanel>
898+
<TextBlock Text="Speed" Style="{StaticResource FieldTextBlockStyle}" />
899+
<TextBlock>
900+
<Run Text="{Binding Options.Speed}" />
901+
<Run Text="x" />
902+
</TextBlock>
903+
</StackPanel>
904+
<StackPanel>
905+
<TextBlock Text="Duration" Style="{StaticResource FieldTextBlockStyle}" />
906+
<TextBlock>
907+
<Run Text="{Binding Duration, FallbackValue=00:00:00, StringFormat={}mm\\:ss\\.ff}" />
908+
<Run Text="ms" />
909+
</TextBlock>
910+
</StackPanel>
911+
<StackPanel>
912+
<TextBlock Text="Seed" Style="{StaticResource FieldTextBlockStyle}" />
913+
<TextBlock>
914+
<Run Text="{Binding Options.Seed}" />
915+
</TextBlock>
916+
</StackPanel>
917+
<StackPanel>
918+
<TextBlock Text="Steps" Style="{StaticResource FieldTextBlockStyle}" />
919+
<TextBlock>
920+
<Run Text="{Binding Options.Steps}" />
921+
</TextBlock>
922+
</StackPanel>
923+
<StackPanel>
924+
<TextBlock Text="Sample Rate" Style="{StaticResource FieldTextBlockStyle}" />
925+
<TextBlock>
926+
<Run Text="{Binding SampleRate}" />
927+
<Run Text="hz" />
928+
</TextBlock>
929+
</StackPanel>
930+
</UniformGrid>
931+
932+
<StackPanel Margin="0,6,0,0" >
933+
<TextBlock Text="Created" Style="{StaticResource FieldTextBlockStyle}" />
934+
<TextBlock Text="{Binding Timestamp}" />
935+
</StackPanel>
936+
<UniformGrid Columns="2" Margin="0,6,0,0">
937+
<StackPanel>
938+
<TextBlock Text="{Binding MediaType, StringFormat={}{0} Path}" Style="{StaticResource FieldTextBlockStyle}" />
939+
<CommonControls:HyperlinkControl Label="{Binding MediaPath, Converter={StaticResource FullPathToFileNameConverter}}" Link="{Binding MediaPath, Converter={StaticResource FullPathToFolderConverter}}" IsUnderlineEnabled="False" />
940+
</StackPanel>
941+
<StackPanel>
942+
<TextBlock Text="Info Path" Style="{StaticResource FieldTextBlockStyle}" />
943+
<CommonControls:HyperlinkControl Label="{Binding FilePath, Converter={StaticResource FullPathToFileNameConverter}}" Link="{Binding FilePath}" IsUnderlineEnabled="False" />
944+
</StackPanel>
945+
</UniformGrid>
946+
</StackPanel>
947+
</DockPanel>
948+
</DataTemplate>
949+
950+
951+
<DataTemplate DataType="{x:Type Common:TextHistory}">
952+
<DockPanel Margin="10,0,0,0">
953+
<TextBlock DockPanel.Dock="Top" Text="{Binding MediaType, StringFormat={}Generated {0}}" FontSize="20"/>
954+
<StackPanel>
955+
<Separator Opacity=".7"/>
956+
<UniformGrid Columns="3" Margin="0,6,0,0">
957+
<StackPanel>
958+
<TextBlock Text="Length" Style="{StaticResource FieldTextBlockStyle}" />
959+
<TextBlock>
960+
<Run Text="{Binding InputLength}" />
961+
</TextBlock>
962+
</StackPanel>
963+
964+
</UniformGrid>
825965

966+
<StackPanel Margin="0,6,0,0" >
967+
<TextBlock Text="Created" Style="{StaticResource FieldTextBlockStyle}" />
968+
<TextBlock Text="{Binding Timestamp}" />
969+
</StackPanel>
970+
<UniformGrid Columns="2" Margin="0,6,0,0">
971+
<StackPanel>
972+
<TextBlock Text="{Binding MediaType, StringFormat={}{0} Path}" Style="{StaticResource FieldTextBlockStyle}" />
973+
<CommonControls:HyperlinkControl Label="{Binding MediaPath, Converter={StaticResource FullPathToFileNameConverter}}" Link="{Binding MediaPath, Converter={StaticResource FullPathToFolderConverter}}" IsUnderlineEnabled="False" />
974+
</StackPanel>
975+
<StackPanel>
976+
<TextBlock Text="Info Path" Style="{StaticResource FieldTextBlockStyle}" />
977+
<CommonControls:HyperlinkControl Label="{Binding FilePath, Converter={StaticResource FullPathToFileNameConverter}}" Link="{Binding FilePath}" IsUnderlineEnabled="False" />
978+
</StackPanel>
979+
</UniformGrid>
980+
</StackPanel>
981+
</DockPanel>
982+
</DataTemplate>
826983

827984

828985
<Style x:Key="SettingsListBoxItem" TargetType="{x:Type ListBoxItem}">

DiffuseApp/DiffuseApp/App.xaml.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ public App()
7171
builder.Services.AddSingleton<IDiffusionService, DiffusionService>();
7272
builder.Services.AddSingleton<IEnvironmentService, EnvironmentService>();
7373
builder.Services.AddSingleton<IInterpolationService, InterpolationService>();
74+
builder.Services.AddSingleton<IAudioService, AudioService>();
7475

7576
// Build
7677
_appHost = builder.Build();
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
using Diffuse.Views;
2+
using System;
3+
using System.Runtime.CompilerServices;
4+
using System.Text.Json.Serialization;
5+
6+
namespace Diffuse.Common
7+
{
8+
public record AudioHistory : IHistoryItem
9+
{
10+
public int Version { get; init; }
11+
public string Id { get; init; }
12+
public View Source { get; init; }
13+
public MediaType MediaType { get; init; }
14+
public DateTime Timestamp { get; init; }
15+
public string Extension { get; init; }
16+
public string Model { get; init; }
17+
18+
19+
20+
public int Channels { get; init; }
21+
public int SampleRate { get; init; }
22+
public TimeSpan Duration { get; init; }
23+
24+
25+
public AudioInputOptions Options { get; init; }
26+
27+
[JsonIgnore]
28+
public string FilePath { get; set; }
29+
30+
[JsonIgnore]
31+
public string MediaPath { get; set; }
32+
33+
[JsonIgnore]
34+
public string ThumbPath { get; set; }
35+
36+
37+
38+
39+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
40+
public int Width { get; init; }
41+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
42+
public int Height { get; init; }
43+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
44+
public float FrameRate { get; init; }
45+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
46+
public int FrameCount { get; init; }
47+
public virtual bool Equals(UpscaleHistory other) => ReferenceEquals(this, other);
48+
public override int GetHashCode() => RuntimeHelpers.GetHashCode(this);
49+
}
50+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
using TensorStack.TextGeneration.Common;
2+
using TensorStack.TextGeneration.Pipelines.Whisper;
3+
using TensorStack.WPF;
4+
5+
namespace Diffuse.Common
6+
{
7+
public class AudioInputOptions : BaseModel
8+
{
9+
public int Seed { get; set; }
10+
11+
//Supertonic
12+
public string VoiceStyle { get; set; } = "Female1";
13+
public int Steps { get; set; } = 10;
14+
public float Speed { get; set; } = 1.1f;
15+
public float SilenceDuration { get; set; } = 0.3f;
16+
17+
18+
//Whisper
19+
public LanguageType Language { get; set; } = LanguageType.EN;
20+
public TaskType Task { get; set; } = TaskType.Transcribe;
21+
public int MinLength { get; set; } = 20;
22+
public int MaxLength { get; set; } = 512;
23+
public int NoRepeatNgramSize { get; set; } = 3;
24+
public int Beams { get; set; } = 0;
25+
public int TopK { get; set; } = 50;
26+
public float TopP { get; set; } = 0.95f;
27+
public float Temperature { get; set; } = 1.0f;
28+
public float LengthPenalty { get; set; } = 1.0f;
29+
public EarlyStopping EarlyStopping { get; set; } = EarlyStopping.BestBeam;
30+
public int DiversityLength { get; set; } = 1;
31+
public int ChunkSize { get; set; } = 5;
32+
}
33+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
using System.IO;
2+
using System.Linq;
3+
using System.Text.Json.Serialization;
4+
using System.Threading.Tasks;
5+
using TensorStack.Common.Common;
6+
using TensorStack.WPF;
7+
using TensorStack.WPF.Services;
8+
9+
namespace Diffuse.Common
10+
{
11+
public class AudioModel : BaseModel
12+
{
13+
private bool _isValid;
14+
15+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
16+
public int Id { get; set; }
17+
public string Name { get; set; }
18+
public bool IsDefault { get; set; }
19+
public AudioModelType Type { get; set; }
20+
public string Version { get; set; }
21+
public int MinLength { get; set; }
22+
public int MaxLength { get; set; }
23+
public string[] Prefixes { get; set; }
24+
public string[] UrlPaths { get; set; }
25+
26+
[JsonIgnore]
27+
public string Path { get; set; }
28+
29+
[JsonIgnore]
30+
public bool IsValid
31+
{
32+
get { return _isValid; }
33+
private set { SetProperty(ref _isValid, value); }
34+
}
35+
36+
37+
public void Initialize(string modelDirectory)
38+
{
39+
var directory = System.IO.Path.Combine(modelDirectory, Name);
40+
var modelFiles = FileHelper.GetUrlFileMapping(UrlPaths, directory);
41+
if (modelFiles.Values.All(File.Exists))
42+
{
43+
IsValid = true;
44+
Path = directory;
45+
}
46+
}
47+
48+
49+
public async Task<bool> DownloadAsync(string modelDirectory)
50+
{
51+
var directory = System.IO.Path.Combine(modelDirectory, Name);
52+
if (await DialogService.DownloadAsync($"Download '{Name}' model?", UrlPaths, directory))
53+
Initialize(modelDirectory);
54+
55+
return IsValid;
56+
}
57+
}
58+
59+
60+
public enum AudioModelType
61+
{
62+
Whisper = 0,
63+
Supertonic = 10
64+
}
65+
}

0 commit comments

Comments
 (0)