@@ -474,11 +474,16 @@ public static function split(
474
474
string $ pattern ,
475
475
bool |int $ captureOffset = false ,
476
476
bool $ skipEmpty = false ,
477
+ bool $ utf8Offset = false ,
477
478
): array {
478
479
$ flags = is_int ($ captureOffset ) // back compatibility
479
480
? $ captureOffset
480
481
: ($ captureOffset ? PREG_SPLIT_OFFSET_CAPTURE : 0 ) | ($ skipEmpty ? PREG_SPLIT_NO_EMPTY : 0 );
481
- return self ::pcre ('preg_split ' , [$ pattern , $ subject , -1 , $ flags | PREG_SPLIT_DELIM_CAPTURE ]);
482
+ $ m = self ::pcre ('preg_split ' , [$ pattern , $ subject , -1 , $ flags | PREG_SPLIT_DELIM_CAPTURE ]);
483
+ if ($ utf8Offset && ($ flags & PREG_SPLIT_OFFSET_CAPTURE )) {
484
+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
485
+ }
486
+ return $ m ;
482
487
}
483
488
484
489
@@ -491,16 +496,24 @@ public static function match(
491
496
bool |int $ captureOffset = false ,
492
497
int $ offset = 0 ,
493
498
bool $ unmatchedAsNull = false ,
499
+ bool $ utf8Offset = false ,
494
500
): ?array {
495
501
$ flags = is_int ($ captureOffset ) // back compatibility
496
502
? $ captureOffset
497
503
: ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
504
+ if ($ utf8Offset ) {
505
+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
506
+ }
498
507
if ($ offset > strlen ($ subject )) {
499
508
return null ;
500
509
}
501
- return self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])
502
- ? $ m
503
- : null ;
510
+ if (!self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])) {
511
+ return null ;
512
+ }
513
+ if ($ utf8Offset && ($ flags & PREG_OFFSET_CAPTURE )) {
514
+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
515
+ }
516
+ return $ m ;
504
517
}
505
518
506
519
@@ -515,10 +528,14 @@ public static function matchAll(
515
528
int $ offset = 0 ,
516
529
bool $ unmatchedAsNull = false ,
517
530
bool $ patternOrder = false ,
531
+ bool $ utf8Offset = false ,
518
532
): array {
519
533
$ flags = is_int ($ captureOffset ) // back compatibility
520
534
? $ captureOffset
521
535
: ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 ) | ($ patternOrder ? PREG_PATTERN_ORDER : 0 );
536
+ if ($ utf8Offset ) {
537
+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
538
+ }
522
539
if ($ offset > strlen ($ subject )) {
523
540
return [];
524
541
}
@@ -527,6 +544,9 @@ public static function matchAll(
527
544
($ flags & PREG_PATTERN_ORDER ) ? $ flags : ($ flags | PREG_SET_ORDER ),
528
545
$ offset ,
529
546
]);
547
+ if ($ utf8Offset && ($ flags & PREG_OFFSET_CAPTURE )) {
548
+ return self ::bytesToChars ($ subject , $ m );
549
+ }
530
550
return $ m ;
531
551
}
532
552
@@ -541,12 +561,16 @@ public static function replace(
541
561
int $ limit = -1 ,
542
562
bool $ captureOffset = false ,
543
563
bool $ unmatchedAsNull = false ,
564
+ bool $ utf8Offset = false ,
544
565
): string {
545
566
if (is_object ($ replacement ) || is_array ($ replacement )) {
546
567
if (!is_callable ($ replacement , false , $ textual )) {
547
568
throw new Nette \InvalidStateException ("Callback ' $ textual' is not callable. " );
548
569
}
549
570
$ flags = ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
571
+ if ($ utf8Offset && $ captureOffset ) {
572
+ $ replacement = fn ($ m ) => $ replacement (self ::bytesToChars ($ subject , [$ m ])[0 ]);
573
+ }
550
574
return self ::pcre ('preg_replace_callback ' , [$ pattern , $ replacement , $ subject , $ limit , 0 , $ flags ]);
551
575
552
576
} elseif (is_array ($ pattern ) && is_string (key ($ pattern ))) {
@@ -558,6 +582,22 @@ public static function replace(
558
582
}
559
583
560
584
585
+ private static function bytesToChars (string $ s , array $ groups ): array
586
+ {
587
+ $ lastBytes = $ lastChars = 0 ;
588
+ foreach ($ groups as &$ matches ) {
589
+ foreach ($ matches as &$ match ) {
590
+ if ($ match [1 ] > $ lastBytes ) {
591
+ $ lastChars += self ::length (substr ($ s , $ lastBytes , $ match [1 ] - $ lastBytes ));
592
+ $ lastBytes = $ match [1 ];
593
+ }
594
+ $ match [1 ] = $ lastChars ;
595
+ }
596
+ }
597
+ return $ groups ;
598
+ }
599
+
600
+
561
601
/** @internal */
562
602
public static function pcre (string $ func , array $ args )
563
603
{
0 commit comments