Skip to content

Commit 1eedefb

Browse files
author
alexander.kischuk
committed
Non-Ascii characters support for the output file
PDFOC-91
1 parent fdd155c commit 1eedefb

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/IntegrationTestHelper.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ public class IntegrationTestHelper extends ExtendedITextTest {
7373
// directory with test files
7474
public static final String TEST_DIRECTORY = "./src/test/resources/com/itextpdf/pdfocr/";
7575
private static final String TARGET_FOLDER = "./target/test/resources/com/itextpdf/pdfocr/";
76+
private static final String NON_ASCII_TARGET_DIRECTORY = "./target/test/resources/com/itextpdf/ñoñ-ascîî/";
7677

7778
// directory with trained data for tests
7879
protected static final String LANG_TESS_DATA_DIRECTORY = TEST_DIRECTORY + "tessdata";
@@ -159,6 +160,16 @@ public static String getTargetDirectory() {
159160
return TARGET_FOLDER;
160161
}
161162

163+
/**
164+
* Returns a non ascii target directory.
165+
*/
166+
public static String getNonAsciiTargetDirectory() {
167+
if (!Files.exists(java.nio.file.Paths.get(NON_ASCII_TARGET_DIRECTORY))) {
168+
createDestinationFolder(NON_ASCII_TARGET_DIRECTORY);
169+
}
170+
return NON_ASCII_TARGET_DIRECTORY;
171+
}
172+
162173
protected static File getTessDataDirectory() {
163174
return new File(LANG_TESS_DATA_DIRECTORY);
164175
}

pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,18 @@ public void testJapaneseScript() {
623623
Assert.assertEquals(expected, result);
624624
}
625625

626+
@Test
627+
public void testTargetDirectoryWithNonAsciiPath() {
628+
String imgPath = TEST_IMAGES_DIRECTORY + "german_01.jpg";
629+
String expectedTxt = TEST_DOCUMENTS_DIRECTORY + "german_01" + testFileTypeName + ".txt";
630+
List<String> languages = Collections.<String>singletonList("deu");
631+
String resultTxtFile = getNonAsciiTargetDirectory() + getImageName(imgPath, languages) + ".txt";
632+
doOcrAndSaveToTextFile(tesseractReader, imgPath, resultTxtFile, languages);
633+
634+
boolean result = compareTxtFiles(expectedTxt, resultTxtFile);
635+
Assert.assertTrue(result);
636+
}
637+
626638
/**
627639
* Do OCR for given image and compare result text file with expected one.
628640
*/

0 commit comments

Comments
 (0)