-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProgram.cs
53 lines (48 loc) · 1.98 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
using GhostscriptSharp;
using iText.IO.Codec;
using iText.Kernel.Pdf;
using System;
using System.IO;
using Tesseract;
namespace hi
{
public static class Program
{
public static void Main()
{
//Input File Path
string input_path = @"C:\Users\HP\Desktop\input.pdf";
//Output File Path
string output_path = @"C:\Users\HP\Desktop\output";
//Tessdata Folder
string training_data = @"C:\Users\HP\Desktop\tessdata";
PdfReader pdf = new PdfReader(input_path);
PdfDocument pdfDoc = new PdfDocument(pdf);
int n = pdfDoc.GetNumberOfPages();
pdf.Close();
using (IResultRenderer renderer = Tesseract.PdfResultRenderer.CreatePdfRenderer(output_path, training_data, false))
{
using (renderer.BeginDocument("Serachablepdftest"))
{
for (int i = 1; i <= n; i++)
{
GhostscriptWrapper.GeneratePageThumbs(input_path, "example" + i + ".jpg", i, n, 200, 200);
string configurationFilePath = training_data;
string configfile = Path.Combine(training_data, "pdf.ttf");
using (TesseractEngine engine = new TesseractEngine(configurationFilePath, "eng", EngineMode.TesseractAndLstm, configfile))
{
using (var img = Pix.LoadFromFile("example" + i + ".jpg"))
{
using (var page = engine.Process(img, "Serachablepdftest"))
{
renderer.AddPage(page);
}
}
}
Console.WriteLine("Page " + i + "done\n");
}
}
}
}
}
}