Skip to content

Commit 0d0ac3a

Browse files
authored
Update EBCDIC support to support testing on normal ASCII systems (#656)
The pcre2test utility needs quite a few changes to accommodate this. It is simpler to add a new mode to it, than to make it fully EBCDIC-native. On an ASCII system, pcre2test performs ASCII I/O, but tranlates the input when passing it to the fully-EBCDIC-supporting library.
1 parent ce6e960 commit 0d0ac3a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+3478
-1577
lines changed

CMakeLists.txt

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,10 @@ set(
272272

273273
set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
274274

275+
set(PCRE2_EBCDIC_IGNORING_COMPILER OFF CACHE BOOL "Force EBCDIC 1047 using numeric literals rather than C character literals; implies EBCDIC.")
276+
277+
option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
278+
275279
set(
276280
PCRE2_LINK_SIZE
277281
"2"
@@ -579,13 +583,42 @@ if(NEWLINE_DEFAULT STREQUAL "")
579583
)
580584
endif()
581585

586+
set(REBUILD_CHARTABLES OFF)
587+
if(PCRE2_REBUILD_CHARTABLES)
588+
set(REBUILD_CHARTABLES ON)
589+
endif()
590+
591+
set(EBCDIC OFF)
582592
if(PCRE2_EBCDIC)
583-
set(EBCDIC 1)
593+
set(EBCDIC ON)
584594
endif()
585595

586596
if(PCRE2_EBCDIC_NL25)
587-
set(EBCDIC 1)
588-
set(EBCDIC_NL25 1)
597+
set(EBCDIC ON)
598+
set(EBCDIC_NL25 ON)
599+
endif()
600+
601+
if(PCRE2_EBCDIC_IGNORING_COMPILER)
602+
set(EBCDIC ON)
603+
set(EBCDIC_IGNORING_COMPILER ON)
604+
endif()
605+
606+
# Make sure that if EBCDIC is set (without EBCDIC_IGNORING_COMPILER), then
607+
# REBUILD_CHARTABLES is also enabled.
608+
# Also check that UTF support is not requested, because PCRE2 cannot handle
609+
# EBCDIC and UTF in the same build. To do so it would need to use different
610+
# character constants depending on the mode.
611+
# Also, EBCDIC cannot be used with 16-bit and 32-bit libraries.
612+
if(EBCDIC)
613+
if(NOT EBCDIC_IGNORING_COMPILER)
614+
set(REBUILD_CHARTABLES ON)
615+
endif()
616+
if(PCRE2_SUPPORT_UNICODE)
617+
message(FATAL_ERROR "Support for EBCDIC and Unicode cannot be enabled at the same time")
618+
endif()
619+
if(PCRE2_BUILD_PCRE2_16 OR PCRE2_BUILD_PCRE2_32)
620+
message(FATAL_ERROR "EBCDIC support is available only for the 8-bit library")
621+
endif()
589622
endif()
590623

591624
# Output files
@@ -659,8 +692,7 @@ endif()
659692

660693
# Character table generation
661694

662-
option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
663-
if(PCRE2_REBUILD_CHARTABLES)
695+
if(REBUILD_CHARTABLES)
664696
add_executable(pcre2_dftables src/pcre2_dftables.c)
665697
add_custom_command(
666698
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
@@ -670,8 +702,12 @@ if(PCRE2_REBUILD_CHARTABLES)
670702
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
671703
VERBATIM
672704
)
673-
else()
705+
elseif(NOT PCRE2_EBCDIC)
674706
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
707+
elseif(PCRE2_EBCDIC_NL25)
708+
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl25 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
709+
else()
710+
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl15 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
675711
endif()
676712

677713
# Source code
@@ -1345,9 +1381,19 @@ if(PCRE2_SHOW_REPORT)
13451381
message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}")
13461382
message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
13471383
message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}")
1348-
message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}")
1349-
message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}")
1350-
message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}")
1384+
1385+
if(NOT EBCDIC)
1386+
set(EBCDIC_NL_CODE "n/a")
1387+
elseif(EBCDIC_NL25)
1388+
set(EBCDIC_NL_CODE "0x25")
1389+
else()
1390+
set(EBCDIC_NL_CODE "0x15")
1391+
endif()
1392+
message(STATUS " EBCDIC coding ..................... : ${EBCDIC}")
1393+
message(STATUS " EBCDIC code for NL ................ : ${EBCDIC_NL_CODE}")
1394+
message(STATUS " EBCDIC coding ignoring compiler ... : ${PCRE2_EBCDIC_IGNORING_COMPILER}")
1395+
message(STATUS " Rebuild char tables ............... : ${REBUILD_CHARTABLES}")
1396+
13511397
message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}")
13521398
message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}")
13531399
message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}")

Makefile.am

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,9 +362,21 @@ src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
362362
rm -f $@
363363
./pcre2_dftables$(EXEEXT) $@
364364
else
365+
if WITH_EBCDIC
366+
if WITH_EBCDIC_NL25
367+
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25
368+
rm -f $@
369+
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25 $(abs_builddir)/src/pcre2_chartables.c
370+
else # WITH_EBCDIC_NL25
371+
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15
372+
rm -f $@
373+
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15 $(abs_builddir)/src/pcre2_chartables.c
374+
endif # WITH_EBCDIC_NL25
375+
else # WITH_EBCDIC
365376
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
366377
rm -f $@
367378
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
379+
endif # WITH_EBCDIC
368380
endif # WITH_REBUILD_CHARTABLES
369381

370382
BUILT_SOURCES = src/pcre2_chartables.c
@@ -460,7 +472,10 @@ endif # WITH_PCRE2_32
460472
# The pcre2_chartables.c.dist file is the default version of
461473
# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified.
462474

463-
EXTRA_DIST += src/pcre2_chartables.c.dist
475+
EXTRA_DIST += \
476+
src/pcre2_chartables.c.dist \
477+
src/pcre2_chartables.c.ebcdic-1047-nl15 \
478+
src/pcre2_chartables.c.ebcdic-1047-nl25
464479
CLEANFILES += src/pcre2_chartables.c
465480

466481
# The JIT compiler lives in a separate directory, but its files are #included
@@ -768,7 +783,8 @@ EXTRA_DIST += \
768783
testdata/testinput25 \
769784
testdata/testinput26 \
770785
testdata/testinput27 \
771-
testdata/testinputEBC \
786+
testdata/testinput28 \
787+
testdata/testinput29 \
772788
testdata/testinputheap \
773789
testdata/testoutput1 \
774790
testdata/testoutput2 \
@@ -810,7 +826,8 @@ EXTRA_DIST += \
810826
testdata/testoutput25 \
811827
testdata/testoutput26 \
812828
testdata/testoutput27 \
813-
testdata/testoutputEBC \
829+
testdata/testoutput28 \
830+
testdata/testoutput29 \
814831
testdata/testoutputheap-16 \
815832
testdata/testoutputheap-32 \
816833
testdata/testoutputheap-8 \

README

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,22 @@ library. They are also documented in the pcre2build man page.
309309

310310
--enable-ebcdic --disable-unicode
311311

312-
This automatically implies --enable-rebuild-chartables (see above). However,
313-
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
314-
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
315-
which specifies that the code value for the EBCDIC NL character is 0x25
316-
instead of the default 0x15.
312+
This automatically implies --enable-rebuild-chartables (see above), in order
313+
to ensure that you have the correct default character tables for your system's
314+
codepage. There is an exception when you set --enable-ebcdic-ignoring-compiler
315+
(see below), which allows using a default set of EBCDIC 1047 character tables
316+
rather than forcing use of --enable-rebuild-chartables.
317+
318+
When PCRE2 is built with EBCDIC support, it always operates in EBCDIC. It
319+
cannot support both EBCDIC and ASCII or UTF-8/16/32.
320+
321+
There is a second option, --enable-ebcdic-nl25, which specifies that the code
322+
value for the EBCDIC NL character is 0x25 instead of the default 0x15.
323+
324+
There is a third option, --enable-ebcdic-ignoring-compiler, which disregards
325+
the compiler's codepage for determining the numeric value of C character
326+
constants such as 'z', and instead forces PCRE2 to use numeric constants for
327+
the EBCDIC 1047 codepage instead.
317328

318329
. If you specify --enable-debug, additional debugging code is included in the
319330
build. This option is intended for use by the PCRE2 maintainers.
@@ -744,8 +755,16 @@ and with UTF support, respectively. Test 23 tests \C when it is locked out.
744755
Tests 24 and 25 test the experimental pattern conversion functions, without and
745756
with UTF support, respectively.
746757

747-
Test 26 checks Unicode property support using tests that are generated
748-
automatically from the Unicode data tables.
758+
Test 26 checks Unicode property support using tests that were generated
759+
automatically from the Unicode data tables. These are the archived version of
760+
the tests from Unicode 15.
761+
762+
Test 27 checks Unicode property support using tests that are generated
763+
automatically from the currently-used Unicode data tables.
764+
765+
Test 28 tests EBCDIC support, and is only run when PCRE2 is specifically
766+
compiled for EBCDIC. Test 29 tests EBCDIC when NL has been configured to be
767+
0x25.
749768

750769

751770
Character tables
@@ -822,6 +841,10 @@ The distribution should contain the files listed below.
822841
src/pcre2_chartables.c.dist a default set of character tables that assume
823842
ASCII coding; unless --enable-rebuild-chartables is
824843
specified, used by copying to pcre2_chartables.c
844+
src/pcre2_chartables.c.ebcdic-1047-{nl15,nl25} a default set of character
845+
tables for EBCDIC 1047; used if
846+
--enable-ebcdic-ignoring-compiler is specified
847+
without --enable-rebuild-chartables
825848

826849
src/pcre2posix.c )
827850
src/pcre2_auto_possess.c )

0 commit comments

Comments
 (0)