Skip to content

Commit cef4df5

Browse files
committed
951720 Added sample code for line breaks in OCR.
1 parent 74361f3 commit cef4df5

5 files changed

Lines changed: 69 additions & 0 deletions

File tree

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio Version 17
4+
VisualStudioVersion = 17.12.35707.178 d17.12
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Adding-Line-Breaks-Using-OCR", "Adding-Line-Breaks-Using-OCR\Adding-Line-Breaks-Using-OCR.csproj", "{9B5BD78C-C908-4195-B981-EFB2EF032039}"
7+
EndProject
8+
Global
9+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
10+
Debug|Any CPU = Debug|Any CPU
11+
Release|Any CPU = Release|Any CPU
12+
EndGlobalSection
13+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
14+
{9B5BD78C-C908-4195-B981-EFB2EF032039}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15+
{9B5BD78C-C908-4195-B981-EFB2EF032039}.Debug|Any CPU.Build.0 = Debug|Any CPU
16+
{9B5BD78C-C908-4195-B981-EFB2EF032039}.Release|Any CPU.ActiveCfg = Release|Any CPU
17+
{9B5BD78C-C908-4195-B981-EFB2EF032039}.Release|Any CPU.Build.0 = Release|Any CPU
18+
EndGlobalSection
19+
GlobalSection(SolutionProperties) = preSolution
20+
HideSolutionNode = FALSE
21+
EndGlobalSection
22+
EndGlobal
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net8.0</TargetFramework>
6+
<RootNamespace>Adding_Line_Breaks_Using_OCR</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="Syncfusion.PDF.OCR.NET" Version="*" />
13+
</ItemGroup>
14+
15+
</Project>
Binary file not shown.

OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Output/gitkeep.txt

Whitespace-only changes.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
using Syncfusion.OCRProcessor;
2+
using Syncfusion.Pdf.Parsing;
3+
4+
// Initialize the OCR processor
5+
using (OCRProcessor processor = new OCRProcessor())
6+
{
7+
// Load the existing PDF document
8+
using (FileStream stream = new FileStream(Path.GetFullPath(@"Data/Input.pdf"), FileMode.Open))
9+
{
10+
PdfLoadedDocument pdfLoadedDocument = new PdfLoadedDocument(stream);
11+
12+
// Set OCR language to process
13+
processor.Settings.Language = Languages.English;
14+
15+
processor.Settings.PageSegment = PageSegMode.SparseTextOsd;
16+
17+
// Process OCR by providing the PDF document
18+
processor.PerformOCR(pdfLoadedDocument, processor.TessDataPath, out OCRLayoutResult layoutResult);
19+
string ocrText = string.Join("\n", layoutResult.Pages[0].Lines.Select(line => line.Text));
20+
21+
22+
//Create file stream.
23+
using (FileStream outputFileStream = new FileStream(Path.GetFullPath(@"Output/Output.pdf"), FileMode.Create, FileAccess.ReadWrite))
24+
{
25+
//Save the PDF document to file stream.
26+
pdfLoadedDocument.Save(outputFileStream);
27+
}
28+
//Close the document.
29+
pdfLoadedDocument.Close(true);
30+
File.WriteAllText(Path.GetFullPath(@"Output/Output.txt"), ocrText);
31+
}
32+
}

0 commit comments

Comments
 (0)