Skip to content

Commit 11df68b

Browse files
committed
* Fixed namespace issue with OnixTextContent.
* Added extension methods that use OpenAI GPT4 for repairing HTML text.
1 parent 08ba338 commit 11df68b

8 files changed

Lines changed: 245 additions & 2 deletions

File tree

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
using System;
2+
3+
using OnixData.Legacy;
4+
using OnixData.Services;
5+
using OnixData.Version3.Text;
6+
7+
namespace OnixData.Extensions
8+
{
9+
public static class OnixRepairTextExtensions
10+
{
11+
/// <summary>
12+
///
13+
/// Repairs any malformed HTML in the Text field of an OnixLegacyOtherText object
14+
///
15+
/// </summary>
16+
public static TimeSpan RepairHtmlText(this OnixLegacyOtherText legacyOtherText, OnixChatServiceSettings chatServiceSettings)
17+
{
18+
TimeSpan timeSpan = new TimeSpan(0);
19+
20+
if (!String.IsNullOrEmpty(legacyOtherText.Text))
21+
{
22+
OnixTextRepairChatService repairService = new OnixTextRepairChatService(chatServiceSettings);
23+
24+
legacyOtherText.Text = repairService.RetrieveRepairedCommHtml(legacyOtherText.Text, ref timeSpan);
25+
}
26+
27+
return timeSpan;
28+
}
29+
30+
/// <summary>
31+
///
32+
/// Repairs any malformed HTML in the Text field of an OnixLegacyOtherText object
33+
///
34+
/// </summary>
35+
public static TimeSpan RepairHtmlText(this OnixTextContent onixTextContent, OnixChatServiceSettings chatServiceSettings)
36+
{
37+
TimeSpan timeSpan = new TimeSpan(0);
38+
39+
if (!String.IsNullOrEmpty(onixTextContent.Text))
40+
{
41+
OnixTextRepairChatService repairService = new OnixTextRepairChatService(chatServiceSettings);
42+
43+
onixTextContent.Text = repairService.RetrieveRepairedCommHtml(onixTextContent.Text, ref timeSpan);
44+
}
45+
46+
return timeSpan;
47+
}
48+
}
49+
}

OnixData/OnixData.csproj

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,54 @@
3131
<WarningLevel>4</WarningLevel>
3232
</PropertyGroup>
3333
<ItemGroup>
34+
<Reference Include="Microsoft.Bcl.AsyncInterfaces, Version=8.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
35+
<HintPath>..\packages\Microsoft.Bcl.AsyncInterfaces.8.0.0\lib\net462\Microsoft.Bcl.AsyncInterfaces.dll</HintPath>
36+
</Reference>
37+
<Reference Include="Microsoft.Extensions.DependencyInjection.Abstractions, Version=8.0.0.2, Culture=neutral, PublicKeyToken=adb9793829ddae60, processorArchitecture=MSIL">
38+
<HintPath>..\packages\Microsoft.Extensions.DependencyInjection.Abstractions.8.0.2\lib\net462\Microsoft.Extensions.DependencyInjection.Abstractions.dll</HintPath>
39+
</Reference>
40+
<Reference Include="Microsoft.Extensions.Logging.Abstractions, Version=8.0.0.3, Culture=neutral, PublicKeyToken=adb9793829ddae60, processorArchitecture=MSIL">
41+
<HintPath>..\packages\Microsoft.Extensions.Logging.Abstractions.8.0.3\lib\net462\Microsoft.Extensions.Logging.Abstractions.dll</HintPath>
42+
</Reference>
43+
<Reference Include="OpenAI, Version=2.5.0.0, Culture=neutral, PublicKeyToken=b4187f3e65366280, processorArchitecture=MSIL">
44+
<HintPath>..\packages\OpenAI.2.5.0\lib\netstandard2.0\OpenAI.dll</HintPath>
45+
</Reference>
3446
<Reference Include="System" />
47+
<Reference Include="System.Buffers, Version=4.0.3.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
48+
<HintPath>..\packages\System.Buffers.4.5.1\lib\net461\System.Buffers.dll</HintPath>
49+
</Reference>
50+
<Reference Include="System.ClientModel, Version=1.6.1.0, Culture=neutral, PublicKeyToken=92742159e12e44c8, processorArchitecture=MSIL">
51+
<HintPath>..\packages\System.ClientModel.1.6.1\lib\netstandard2.0\System.ClientModel.dll</HintPath>
52+
</Reference>
3553
<Reference Include="System.Core" />
54+
<Reference Include="System.Diagnostics.DiagnosticSource, Version=8.0.0.1, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
55+
<HintPath>..\packages\System.Diagnostics.DiagnosticSource.8.0.1\lib\net462\System.Diagnostics.DiagnosticSource.dll</HintPath>
56+
</Reference>
57+
<Reference Include="System.Memory, Version=4.0.1.2, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
58+
<HintPath>..\packages\System.Memory.4.5.5\lib\net461\System.Memory.dll</HintPath>
59+
</Reference>
60+
<Reference Include="System.Memory.Data, Version=8.0.0.1, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
61+
<HintPath>..\packages\System.Memory.Data.8.0.1\lib\net462\System.Memory.Data.dll</HintPath>
62+
</Reference>
63+
<Reference Include="System.Numerics" />
64+
<Reference Include="System.Numerics.Vectors, Version=4.1.4.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
65+
<HintPath>..\packages\System.Numerics.Vectors.4.5.0\lib\net46\System.Numerics.Vectors.dll</HintPath>
66+
</Reference>
67+
<Reference Include="System.Runtime.CompilerServices.Unsafe, Version=6.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
68+
<HintPath>..\packages\System.Runtime.CompilerServices.Unsafe.6.0.0\lib\net461\System.Runtime.CompilerServices.Unsafe.dll</HintPath>
69+
</Reference>
70+
<Reference Include="System.Text.Encodings.Web, Version=8.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
71+
<HintPath>..\packages\System.Text.Encodings.Web.8.0.0\lib\net462\System.Text.Encodings.Web.dll</HintPath>
72+
</Reference>
73+
<Reference Include="System.Text.Json, Version=8.0.0.6, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
74+
<HintPath>..\packages\System.Text.Json.8.0.6\lib\net462\System.Text.Json.dll</HintPath>
75+
</Reference>
76+
<Reference Include="System.Threading.Tasks.Extensions, Version=4.2.0.1, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
77+
<HintPath>..\packages\System.Threading.Tasks.Extensions.4.5.4\lib\net461\System.Threading.Tasks.Extensions.dll</HintPath>
78+
</Reference>
79+
<Reference Include="System.ValueTuple, Version=4.0.3.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
80+
<HintPath>..\packages\System.ValueTuple.4.5.0\lib\net461\System.ValueTuple.dll</HintPath>
81+
</Reference>
3682
<Reference Include="System.Xml.Linq" />
3783
<Reference Include="System.Data.DataSetExtensions" />
3884
<Reference Include="Microsoft.CSharp" />
@@ -43,6 +89,7 @@
4389
<ItemGroup>
4490
<Compile Include="Extensions\OnixDataExtensions.cs" />
4591
<Compile Include="Extensions\OnixParserExtensions.cs" />
92+
<Compile Include="Extensions\OnixRepairTextExtensions.cs" />
4693
<Compile Include="Extensions\OnixReplaceExtensions.cs" />
4794
<Compile Include="Extensions\OnixXmlExtensions.cs" />
4895
<Compile Include="Legacy\OnixLegacyAudience.cs" />
@@ -80,6 +127,8 @@
80127
<Compile Include="OnixPlusParser.cs" />
81128
<Compile Include="OnixXmlTextReader.cs" />
82129
<Compile Include="Properties\AssemblyInfo.cs" />
130+
<Compile Include="Services\OnixChatServiceSettings.cs" />
131+
<Compile Include="Services\OnixTextRepairChatService.cs" />
83132
<Compile Include="Version3\Content\OnixContentItem.cs" />
84133
<Compile Include="Version3\Epub\OnixEpubUsageConstraint.cs" />
85134
<Compile Include="Version3\Epub\OnixEpubUsageLimit.cs" />
@@ -139,7 +188,10 @@
139188
<Compile Include="Version3\Title\OnixTitleDetail.cs" />
140189
<Compile Include="Version3\Title\OnixTitleElement.cs" />
141190
</ItemGroup>
142-
<ItemGroup />
191+
<ItemGroup>
192+
<None Include="app.config" />
193+
<None Include="packages.config" />
194+
</ItemGroup>
143195
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
144196
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
145197
Other similar extension points exist, see Microsoft.Common.targets.

OnixData/OnixParser.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
using OnixData.Extensions;
1313
using OnixData.Version3;
1414
using OnixData.Version3.Header;
15+
using OnixData.Version3.Text;
1516

1617
namespace OnixData
1718
{
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
namespace OnixData.Services
2+
{
3+
public class OnixChatServiceSettings
4+
{
5+
public string ApiKey { get; set; }
6+
public string Model { get; set; }
7+
public string Prompt { get; set; }
8+
public string Suffix { get; set; }
9+
public decimal Temperature { get; set; }
10+
public int MaxTokens { get; set; }
11+
public decimal TopP { get; set; }
12+
public string Stop { get; set; }
13+
14+
public int TimeoutInSeconds { get; set; }
15+
16+
public OnixChatServiceSettings()
17+
{
18+
Model = "gpt-4o-mini";
19+
Suffix = null;
20+
Temperature = 0.3M;
21+
MaxTokens = 2000;
22+
TopP = 1.0m;
23+
Stop = "[END]";
24+
TimeoutInSeconds = 180;
25+
}
26+
}
27+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
using System;
2+
3+
using OpenAI.Chat;
4+
5+
namespace OnixData.Services
6+
{
7+
public class OnixTextRepairChatService
8+
{
9+
public const int CONST_LARGE_REQUEST_SIZE_THRESHOLD = 25000;
10+
11+
public const string RETURN_ERROR_MSG_PREFIX = @"ERROR! ";
12+
13+
public const string CONST_REQUEST_TAG_FIX_MESSAGE_FORMAT =
14+
@"Repair the following XML:\n{0}";
15+
16+
public const string CONST_REQUEST_HTML_FIX_MESSAGE_FORMAT =
17+
@"Repair the following HTML so that it is valid and all tags are paired, without any reordering or omitting text and without converting the text into a HTML document that starts with <!DOCTYPE html> and without adding more text than necessary:\n{0}";
18+
19+
public const string CONST_GPT4_RESPONSE_PADDED_START = "\n";
20+
public const string CONST_GPT4_RESPONSE_HTML_START = "```html";
21+
public const string CONST_GPT4_RESPONSE_HTML_END = "```";
22+
23+
private OnixChatServiceSettings _settings;
24+
25+
private ChatClient _defaultOpenAIChatClient;
26+
private ChatClient _largeRequestOpenAIChatClient;
27+
28+
public OnixTextRepairChatService(OnixChatServiceSettings settings)
29+
{
30+
_settings = settings;
31+
32+
var keyCreds = new System.ClientModel.ApiKeyCredential(settings.ApiKey);
33+
34+
var defaultOptions =
35+
new OpenAI.OpenAIClientOptions() { NetworkTimeout = new TimeSpan(0, 0, settings.TimeoutInSeconds) };
36+
37+
_defaultOpenAIChatClient =
38+
new ChatClient(settings.Model, keyCreds, defaultOptions);
39+
40+
var largeRequestOptions =
41+
new OpenAI.OpenAIClientOptions() { NetworkTimeout = new TimeSpan(0, 10, 0) };
42+
43+
_largeRequestOpenAIChatClient =
44+
new ChatClient(settings.Model, keyCreds, largeRequestOptions);
45+
}
46+
47+
public string RetrieveRepairedCommHtml(string commTextTag, ref TimeSpan requestLength)
48+
{
49+
string repairedCommTextTag = commTextTag;
50+
51+
string promptFormat = CONST_REQUEST_HTML_FIX_MESSAGE_FORMAT;
52+
if (!String.IsNullOrEmpty(_settings.Prompt))
53+
promptFormat = _settings.Prompt;
54+
55+
string promptRequest = String.Format(promptFormat, commTextTag);
56+
57+
DateTime requestStartTime = DateTime.Now;
58+
59+
ChatCompletion completion =
60+
commTextTag.Length < CONST_LARGE_REQUEST_SIZE_THRESHOLD ?
61+
_defaultOpenAIChatClient.CompleteChat(promptRequest) :
62+
_largeRequestOpenAIChatClient.CompleteChat(promptRequest);
63+
64+
if ((completion != null) && (completion.Content.Count > 0))
65+
{
66+
repairedCommTextTag = completion.Content[0].Text;
67+
if (!String.IsNullOrEmpty(repairedCommTextTag) && repairedCommTextTag.Contains(CONST_GPT4_RESPONSE_HTML_START))
68+
{
69+
int htmlStart =
70+
repairedCommTextTag.IndexOf(CONST_GPT4_RESPONSE_HTML_START) + CONST_GPT4_RESPONSE_HTML_START.Length;
71+
72+
int htmlEnd = repairedCommTextTag.IndexOf(CONST_GPT4_RESPONSE_HTML_END, htmlStart);
73+
if (htmlEnd > htmlStart)
74+
{
75+
repairedCommTextTag =
76+
repairedCommTextTag.Substring(htmlStart, htmlEnd - htmlStart).Trim();
77+
}
78+
}
79+
}
80+
81+
requestLength = DateTime.Now.Subtract(requestStartTime);
82+
83+
if (repairedCommTextTag.StartsWith(CONST_GPT4_RESPONSE_PADDED_START))
84+
repairedCommTextTag = repairedCommTextTag.Remove(0, CONST_GPT4_RESPONSE_PADDED_START.Length);
85+
86+
return repairedCommTextTag;
87+
}
88+
}
89+
}

OnixData/Version3/Text/OnixTextContent.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
using System.Text;
55
using System.Threading.Tasks;
66

7-
namespace OnixData.Version3
7+
namespace OnixData.Version3.Text
88
{
99
/// <remarks/>
1010
[System.Xml.Serialization.XmlTypeAttribute(AnonymousType = true)]

OnixData/app.config

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<configuration>
3+
<runtime>
4+
<assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
5+
</assemblyBinding>
6+
</runtime>
7+
</configuration>

OnixData/packages.config

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<packages>
3+
<package id="Microsoft.Bcl.AsyncInterfaces" version="8.0.0" targetFramework="net462" />
4+
<package id="Microsoft.Extensions.DependencyInjection.Abstractions" version="8.0.2" targetFramework="net462" />
5+
<package id="Microsoft.Extensions.Logging.Abstractions" version="8.0.3" targetFramework="net462" />
6+
<package id="OpenAI" version="2.5.0" targetFramework="net462" />
7+
<package id="System.Buffers" version="4.5.1" targetFramework="net462" />
8+
<package id="System.ClientModel" version="1.6.1" targetFramework="net462" />
9+
<package id="System.Diagnostics.DiagnosticSource" version="8.0.1" targetFramework="net462" />
10+
<package id="System.Memory" version="4.5.5" targetFramework="net462" />
11+
<package id="System.Memory.Data" version="8.0.1" targetFramework="net462" />
12+
<package id="System.Numerics.Vectors" version="4.5.0" targetFramework="net462" />
13+
<package id="System.Runtime.CompilerServices.Unsafe" version="6.0.0" targetFramework="net462" />
14+
<package id="System.Text.Encodings.Web" version="8.0.0" targetFramework="net462" />
15+
<package id="System.Text.Json" version="8.0.6" targetFramework="net462" />
16+
<package id="System.Threading.Tasks.Extensions" version="4.5.4" targetFramework="net462" />
17+
<package id="System.ValueTuple" version="4.5.0" targetFramework="net462" />
18+
</packages>

0 commit comments

Comments
 (0)