Skip to content

Commit 57c6a3e

Browse files
Added the samples for remaining data extraction
1 parent 64b181b commit 57c6a3e

File tree

35 files changed

+446
-0
lines changed

35 files changed

+446
-0
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<Solution>
2+
<Project Path="Apply-Confidence-Threshold-to-Extract-data/Apply-Confidence-Threshold-to-Extract-data.csproj" />
3+
</Solution>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net10.0</TargetFramework>
6+
<RootNamespace>Apply_Confidence_Threshold_to_Extract_data</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="Syncfusion.SmartDataExtractor.Net.Core" Version="*" />
13+
</ItemGroup>
14+
15+
<ItemGroup>
16+
<None Update="Data\Input.pdf">
17+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
18+
</None>
19+
<None Update="Output\.gitkeep">
20+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
21+
</None>
22+
</ItemGroup>
23+
24+
</Project>

Data-Extraction/Smart-Data-Extractor/Apply-Confidence-threshold/.NET/Apply-Confidence-Threshold-to-Extract-data/Output/.gitkeep

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
using System.IO;
2+
using Syncfusion.Pdf.Parsing;
3+
using Syncfusion.SmartDataExtractor;
4+
5+
namespace ApplyConfidenceThresholdToExtractData
6+
{
7+
class Program
8+
{
9+
static void Main(string[] args)
10+
{
11+
// Load the input PDF file.
12+
using (FileStream stream = new FileStream(@"Data\Input.pdf", FileMode.Open, FileAccess.Read))
13+
{
14+
// Initialize the Smart Data Extractor.
15+
DataExtractor extractor = new DataExtractor();
16+
17+
// Apply confidence threshold to extract the data.
18+
// Only elements with confidence >= 0.75 will be included in the results.
19+
// Default confidence threshold value is 0.6.
20+
extractor.ConfidenceThreshold = 0.75;
21+
22+
// Extract data and return as a loaded PDF document.
23+
PdfLoadedDocument pdf = extractor.ExtractDataAsPdfDocument(stream);
24+
25+
// Save the extracted output as a new PDF file.
26+
pdf.Save(@"Output\Output.pdf");
27+
28+
// Close the document to release resources.
29+
pdf.Close(true);
30+
}
31+
}
32+
}
33+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<Solution>
2+
<Project Path="Different-form-recognizer-options/Different-form-recognizer-options.csproj" />
3+
</Solution>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net10.0</TargetFramework>
6+
<RootNamespace>Different_form_recognizer_options</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="Syncfusion.SmartDataExtractor.Net.Core" Version="*" />
13+
</ItemGroup>
14+
15+
<ItemGroup>
16+
<None Update="Data\Input.pdf">
17+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
18+
</None>
19+
<None Update="Output\.gitkeep">
20+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
21+
</None>
22+
</ItemGroup>
23+
24+
</Project>

Data-Extraction/Smart-Data-Extractor/Different-form-recognizer-options/.NET/Different-form-recognizer-options/Output/.gitkeep

Whitespace-only changes.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
using System.IO;
2+
using Syncfusion.Pdf.Parsing;
3+
using Syncfusion.SmartDataExtractor;
4+
using Syncfusion.SmartFormRecognizer;
5+
6+
namespace DifferentFormRecognizerOptions
7+
{
8+
class Program
9+
{
10+
static void Main(string[] args)
11+
{
12+
// Open the input PDF file as a stream.
13+
using (FileStream stream = new FileStream(@"Data\Input.pdf", FileMode.Open, FileAccess.Read))
14+
{
15+
// Initialize the Smart Data Extractor.
16+
DataExtractor extractor = new DataExtractor();
17+
18+
// Enable form detection in the document to identify form fields.
19+
extractor.EnableFormDetection = true;
20+
21+
// Configure form recognition options for advanced detection.
22+
FormRecognizeOptions formOptions = new FormRecognizeOptions();
23+
24+
// Recognize forms across pages 1 to 5 in the document.
25+
formOptions.PageRange = new int[,] { { 1, 5 } };
26+
27+
// Set confidence threshold for form recognition to filter results.
28+
formOptions.ConfidenceThreshold = 0.6;
29+
30+
// Enable detection of signatures within the document.
31+
formOptions.DetectSignatures = true;
32+
33+
// Enable detection of textboxes within the document.
34+
formOptions.DetectTextboxes = true;
35+
36+
// Enable detection of checkboxes within the document.
37+
formOptions.DetectCheckboxes = true;
38+
39+
// Enable detection of radio buttons within the document.
40+
formOptions.DetectRadioButtons = true;
41+
42+
// Assign the configured form recognition options to the extractor.
43+
extractor.FormRecognizeOptions = formOptions;
44+
45+
// Extract form data and return as a loaded PDF document.
46+
PdfLoadedDocument pdf = extractor.ExtractDataAsPdfDocument(stream);
47+
48+
// Save the extracted output as a new PDF file.
49+
pdf.Save(@"Output\Output.pdf");
50+
51+
// Close the document to release resources.
52+
pdf.Close(true);
53+
}
54+
}
55+
}
56+
}

0 commit comments

Comments
 (0)