Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve preg_split() function ReturnType #3757

Open
wants to merge 33 commits into
base: 2.1.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6f8c0c0
feat improve preg_split type Extension
malsuke Dec 25, 2024
ca44a91
feat add test for varibles
malsuke Dec 25, 2024
9c33a2a
feat add benevolent type to preg_split
malsuke Dec 25, 2024
48c714d
feat new feat for flag
malsuke Dec 25, 2024
5a0b989
feat improve for flag & non-empty-string
malsuke Dec 25, 2024
05ac909
add test for PREG_SPLIT_DELIM_CAPTURE flag
malsuke Dec 25, 2024
97ed353
add test case for nonEmptySubject
malsuke Dec 26, 2024
a95ed66
feat add if state for nonEmptySubject
malsuke Dec 26, 2024
4031293
feat cleanup
malsuke Dec 26, 2024
0a01610
feat cleanup
malsuke Dec 26, 2024
043ed19
feat cleanup
malsuke Dec 26, 2024
68da760
feat add is_int assertion
malsuke Dec 26, 2024
db052cc
feat fix test
malsuke Dec 26, 2024
a647277
feat fix test
malsuke Dec 26, 2024
b9c303a
fix cleanup
malsuke Dec 26, 2024
319bcbb
fix cleanup
malsuke Dec 26, 2024
9c1a389
fix cleanup
malsuke Dec 26, 2024
8cb3030
fix cleanup loop
malsuke Dec 26, 2024
37f9b3e
fix __benevolent usage
malsuke Jan 14, 2025
cb5925b
fix test
malsuke Jan 14, 2025
541b024
fix test
malsuke Jan 14, 2025
ba25f6b
fix test
malsuke Jan 14, 2025
b4f4885
fix test
malsuke Jan 14, 2025
fb30cd7
fix coding style
malsuke Jan 14, 2025
660195b
fix: use utils function, return point, allow numeric-string
malsuke Mar 7, 2025
6487739
feat: add test for Error
malsuke Mar 7, 2025
03319d4
feat: migrate validation to private method
malsuke Mar 7, 2025
e4a07b0
fix: coding style
malsuke Mar 7, 2025
9388d23
fix: coding style
malsuke Mar 7, 2025
c206ccc
feat: change variable name, fix: check type of limit/flag
malsuke Mar 11, 2025
fce5dfd
add: add test for scaler value
malsuke Mar 11, 2025
79623a4
fix: lint
malsuke Mar 11, 2025
307cf54
feat: return union false
malsuke Mar 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion resources/functionMap.php
Original file line number Diff line number Diff line change
Expand Up @@ -9081,7 +9081,7 @@
'preg_replace' => ['string|array|null', 'regex'=>'string|array', 'replace'=>'string|array', 'subject'=>'string|array', 'limit='=>'int', '&w_count='=>'int'],
'preg_replace_callback' => ['string|array|null', 'regex'=>'string|array', 'callback'=>'callable(array<int|string, string>):string', 'subject'=>'string|array', 'limit='=>'int', '&w_count='=>'int'],
'preg_replace_callback_array' => ['string|array|null', 'pattern'=>'array<string,callable>', 'subject'=>'string|array', 'limit='=>'int', '&w_count='=>'int'],
'preg_split' => ['list<string>|false', 'pattern'=>'string', 'subject'=>'string', 'limit='=>'?int', 'flags='=>'int'],
'preg_split' => ['list<string>|list<array{string, int<0, max>}>|false', 'pattern'=>'string', 'subject'=>'string', 'limit='=>'?int', 'flags='=>'int'],
'prev' => ['mixed', '&rw_array_arg'=>'array|object'],
'print_r' => ['string|true', 'var'=>'mixed', 'return='=>'bool'],
'printf' => ['int', 'format'=>'string', '...values='=>'__stringAndStringable|int|float|null|bool'],
Expand Down
140 changes: 133 additions & 7 deletions src/Type/Php/PregSplitDynamicReturnTypeExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,29 @@
use PHPStan\Reflection\FunctionReflection;
use PHPStan\TrinaryLogic;
use PHPStan\Type\Accessory\AccessoryArrayListType;
use PHPStan\Type\Accessory\AccessoryNonEmptyStringType;
use PHPStan\Type\Accessory\NonEmptyArrayType;
use PHPStan\Type\ArrayType;
use PHPStan\Type\BitwiseFlagHelper;
use PHPStan\Type\Constant\ConstantArrayType;
use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
use PHPStan\Type\Constant\ConstantBooleanType;
use PHPStan\Type\Constant\ConstantIntegerType;
use PHPStan\Type\Constant\ConstantStringType;
use PHPStan\Type\DynamicFunctionReturnTypeExtension;
use PHPStan\Type\ErrorType;
use PHPStan\Type\IntegerRangeType;
use PHPStan\Type\IntegerType;
use PHPStan\Type\MixedType;
use PHPStan\Type\StringType;
use PHPStan\Type\Type;
use PHPStan\Type\TypeCombinator;
use PHPStan\Type\TypeUtils;
use function count;
use function is_array;
use function is_int;
use function preg_match;
use function preg_split;
use function strtolower;

final class PregSplitDynamicReturnTypeExtension implements DynamicFunctionReturnTypeExtension
Expand All @@ -36,17 +48,131 @@ public function isFunctionSupported(FunctionReflection $functionReflection): boo

public function getTypeFromFunctionCall(FunctionReflection $functionReflection, FuncCall $functionCall, Scope $scope): ?Type
{
$flagsArg = $functionCall->getArgs()[3] ?? null;
$args = $functionCall->getArgs();
if (count($args) < 2) {
return null;
}
$patternArg = $args[0];
$subjectArg = $args[1];
$limitArg = $args[2] ?? null;
$flagArg = $args[3] ?? null;
$patternType = $scope->getType($patternArg->value);
$patternConstantTypes = $patternType->getConstantStrings();
$subjectType = $scope->getType($subjectArg->value);
$subjectConstantTypes = $subjectType->getConstantStrings();

if (
count($patternConstantTypes) > 0
&& @preg_match($patternConstantTypes[0]->getValue(), '') === false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we usually us Strings::match

Copy link
Contributor

@staabm staabm Mar 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to check all patterns not only the first

https://3v4l.org/495b1

) {
return new ErrorType();
}

$limits = [];
if ($limitArg === null) {
$limits = [-1];
} else {
$limitType = $scope->getType($limitArg->value);
foreach ($limitType->getConstantScalarValues() as $limit) {
if (!is_int($limit)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

numeric-string $limit is not an error

https://3v4l.org/JuFHj

return new ErrorType();
}
$limits[] = $limit;
}
}

$flags = [];
if ($flagArg === null) {
$flags = [0];
} else {
$flagType = $scope->getType($flagArg->value);
foreach ($flagType->getConstantScalarValues() as $flag) {
if (!is_int($flag)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to be consistent with limit, this might also allow numeric-string

https://3v4l.org/PqFaA

return new ErrorType();
}
$flags[] = $flag;
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By replacing it as follows, type checking within multiple Constant loops will no longer be necessary.

$flags = [];
$flagType = $scope->getType($flagArg->value);
foreach ($flagType->getConstantScalarValues() as $flag) {
    if (!is_int()) {
        return new ErrorType();
    }

    $flags[] = $flag;
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

resolved 8cb3030


if (count($patternConstantTypes) === 0 || count($subjectConstantTypes) === 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this if-branch might be factored out into a private method for readability

$returnNonEmptyStrings = $flagArg !== null && $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($flagArg->value, $scope, 'PREG_SPLIT_NO_EMPTY')->yes();
if ($returnNonEmptyStrings) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would inline this only once used variable to ease reading the code

$returnStringType = TypeCombinator::intersect(
new StringType(),
new AccessoryNonEmptyStringType(),
);
} else {
$returnStringType = new StringType();
}

if ($flagsArg !== null && $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($flagsArg->value, $scope, 'PREG_SPLIT_OFFSET_CAPTURE')->yes()) {
$type = new ArrayType(
new IntegerType(),
new ConstantArrayType([new ConstantIntegerType(0), new ConstantIntegerType(1)], [new StringType(), IntegerRangeType::fromInterval(0, null)], [2], [], TrinaryLogic::createYes()),
$capturedArrayType = new ConstantArrayType(
[new ConstantIntegerType(0), new ConstantIntegerType(1)],
[$returnStringType, IntegerRangeType::fromInterval(0, null)],
[2],
[],
TrinaryLogic::createYes(),
);
return TypeCombinator::union(TypeCombinator::intersect($type, new AccessoryArrayListType()), new ConstantBooleanType(false));

$returnInternalValueType = $returnStringType;
if ($flagArg !== null) {
$flagState = $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($flagArg->value, $scope, 'PREG_SPLIT_OFFSET_CAPTURE');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
$flagState = $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($flagArg->value, $scope, 'PREG_SPLIT_OFFSET_CAPTURE');
$capturesOffset = $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($flagArg->value, $scope, 'PREG_SPLIT_OFFSET_CAPTURE');

if ($flagState->yes()) {
$capturedArrayListType = TypeCombinator::intersect(
new ArrayType(new IntegerType(), $capturedArrayType),
new AccessoryArrayListType(),
);

if ($subjectType->isNonEmptyString()->yes()) {
$capturedArrayListType = TypeCombinator::intersect($capturedArrayListType, new NonEmptyArrayType());
}

return TypeCombinator::union($capturedArrayListType, new ConstantBooleanType(false));
}
if ($flagState->maybe()) {
$returnInternalValueType = TypeCombinator::union(new StringType(), $capturedArrayType);
}
}

$returnListType = TypeCombinator::intersect(new ArrayType(new MixedType(), $returnInternalValueType), new AccessoryArrayListType());
if ($subjectType->isNonEmptyString()->yes()) {
$returnListType = TypeCombinator::intersect(
$returnListType,
new NonEmptyArrayType(),
);
}

return TypeCombinator::union($returnListType, new ConstantBooleanType(false));
}

$resultTypes = [];
foreach ($patternConstantTypes as $patternConstantType) {
foreach ($subjectConstantTypes as $subjectConstantType) {
foreach ($limits as $limit) {
foreach ($flags as $flag) {
$result = @preg_split($patternConstantType->getValue(), $subjectConstantType->getValue(), $limit, $flag);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use Strings::split

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using Strings::split here is not right because the limit is fixed to -1.

if ($result === false) {
continue;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if one of the static analysis time values make preg_split return false we should give-up instead of ignoring this fact

}
$constantArray = ConstantArrayTypeBuilder::createEmpty();
foreach ($result as $key => $value) {
if (is_array($value)) {
$valueConstantArray = ConstantArrayTypeBuilder::createEmpty();
$valueConstantArray->setOffsetValueType(new ConstantIntegerType(0), new ConstantStringType($value[0]));
$valueConstantArray->setOffsetValueType(new ConstantIntegerType(1), new ConstantIntegerType($value[1]));
$returnInternalValueType = $valueConstantArray->getArray();
} else {
$returnInternalValueType = new ConstantStringType($value);
}
$constantArray->setOffsetValueType(new ConstantIntegerType($key), $returnInternalValueType);
}

$resultTypes[] = $constantArray->getArray();
}
}
}
}

return null;
return TypeCombinator::union(...$resultTypes);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we are missing false. in the preg_match inference we decided this can get false even if all args a valid and static analysis time known, because a regex pattern might be super inefficient (or pattern based attacks might trick the regex engine into return false)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the above comment is still true and we are missing the false here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, does this mean that every possible result of preg_split includes the possibility of false, and therefore we need to add false to the union type?

I had implemented it to return an Error if preg_split returns false, as a warning.
So, does this mean I should include false in all cases, instead of returning Error?

Copy link
Contributor

@staabm staabm Mar 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return type of false at runtime is necessary because the preg_split call can fail even if we know everything IIRC.

The current "return ErrorType" could be turned into "return null" in case other rules will already report a phpstan error for the code examples.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have fixed about that in the following commit.
307cf54

Additionally, since handling for the false case is no longer necessary, I have removed if ($result === false).

}

}
9 changes: 1 addition & 8 deletions tests/PHPStan/Analyser/AnalyserIntegrationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
use PHPStan\Type\Constant\ConstantStringType;
use function extension_loaded;
use function restore_error_handler;
use function sprintf;
use const PHP_VERSION_ID;

class AnalyserIntegrationTest extends PHPStanTestCase
Expand Down Expand Up @@ -890,13 +889,7 @@ public function testBug7500(): void
public function testBug7554(): void
{
$errors = $this->runAnalyse(__DIR__ . '/data/bug-7554.php');
$this->assertCount(2, $errors);

$this->assertSame(sprintf('Parameter #1 $%s of function count expects array|Countable, list<array<int, int<0, max>|string>>|false given.', PHP_VERSION_ID < 80000 ? 'var' : 'value'), $errors[0]->getMessage());
$this->assertSame(26, $errors[0]->getLine());

$this->assertSame('Cannot access offset int<1, max> on list<array{string, int<0, max>}>|false.', $errors[1]->getMessage());
$this->assertSame(27, $errors[1]->getLine());
$this->assertCount(0, $errors);
}

public function testBug7637(): void
Expand Down
54 changes: 47 additions & 7 deletions tests/PHPStan/Analyser/nsrt/preg_split.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,50 @@ class HelloWorld
{
public function doFoo()
{
assertType('list<string>|false', preg_split('/-/', '1-2-3'));
assertType('list<string>|false', preg_split('/-/', '1-2-3', -1, PREG_SPLIT_NO_EMPTY));
assertType('list<array{string, int<0, max>}>|false', preg_split('/-/', '1-2-3', -1, PREG_SPLIT_OFFSET_CAPTURE));
assertType('list<array{string, int<0, max>}>|false', preg_split('/-/', '1-2-3', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));
assertType('*ERROR*', preg_split('/[0-9a]', '1-2-3'));
assertType("array{''}", preg_split('/-/', ''));
assertType("array{}", preg_split('/-/', '', -1, PREG_SPLIT_NO_EMPTY));
assertType("array{'1', '-', '2', '-', '3'}", preg_split('/ *(-) */', '1- 2-3', -1, PREG_SPLIT_DELIM_CAPTURE));
assertType("array{array{'', 0}}", preg_split('/-/', '', -1, PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{}", preg_split('/-/', '', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{'1', '2', '3'}", preg_split('/-/', '1-2-3'));
assertType("array{'1', '2', '3'}", preg_split('/-/', '1-2-3', -1, PREG_SPLIT_NO_EMPTY));
assertType("array{'1', '3'}", preg_split('/-/', '1--3', -1, PREG_SPLIT_NO_EMPTY));
assertType("array{array{'1', 0}, array{'2', 2}, array{'3', 4}}", preg_split('/-/', '1-2-3', -1, PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{array{'1', 0}, array{'2', 2}, array{'3', 4}}", preg_split('/-/', '1-2-3', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{array{'1', 0}, array{'', 2}, array{'3', 3}}", preg_split('/-/', '1--3', -1, PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{array{'1', 0}, array{'3', 3}}", preg_split('/-/', '1--3', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));
}

public function doWithVariables(string $pattern, string $subject, int $offset, int $flags): void
{
assertType('list<array{string, int<0, max>}|string>|false', preg_split($pattern, $subject, $offset, $flags));
assertType('list<array{string, int<0, max>}|string>|false', preg_split("//", $subject, $offset, $flags));

assertType('non-empty-list<array{string, int<0, max>}|string>|false', preg_split($pattern, "1-2-3", $offset, $flags));
assertType('list<array{string, int<0, max>}|string>|false', preg_split($pattern, $subject, -1, $flags));
assertType('list<non-empty-string>|false', preg_split($pattern, $subject, $offset, PREG_SPLIT_NO_EMPTY));
assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $offset, PREG_SPLIT_OFFSET_CAPTURE));
assertType("list<string>|false", preg_split($pattern, $subject, $offset, PREG_SPLIT_DELIM_CAPTURE));
assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $offset, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE));
}

/**
* @param non-empty-string $nonEmptySubject
*/
public function doWithNonEmptySubject(string $pattern, string $nonEmptySubject, int $offset, int $flags): void
{
assertType('non-empty-list<string>|false', preg_split("//", $nonEmptySubject));

assertType('non-empty-list<array{string, int<0, max>}|string>|false', preg_split($pattern, $nonEmptySubject, $offset, $flags));
assertType('non-empty-list<array{string, int<0, max>}|string>|false', preg_split("//", $nonEmptySubject, $offset, $flags));

assertType('non-empty-list<array{string, int<0, max>}>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_OFFSET_CAPTURE));
assertType('non-empty-list<non-empty-string>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_NO_EMPTY));
assertType('non-empty-list<string>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_DELIM_CAPTURE));
assertType('non-empty-list<array{string, int<0, max>}>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE));
assertType('non-empty-list<array{non-empty-string, int<0, max>}>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));
assertType('non-empty-list<non-empty-string>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE));
}

/**
Expand All @@ -26,16 +66,16 @@ public static function splitWithOffset($pattern, $subject, $limit = -1, $flags =
{
assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $limit, $flags | PREG_SPLIT_OFFSET_CAPTURE));
assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $limit, PREG_SPLIT_OFFSET_CAPTURE | $flags));

assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $limit, PREG_SPLIT_OFFSET_CAPTURE | $flags | PREG_SPLIT_NO_EMPTY));
assertType('list<array{non-empty-string, int<0, max>}>|false', preg_split($pattern, $subject, $limit, PREG_SPLIT_OFFSET_CAPTURE | $flags | PREG_SPLIT_NO_EMPTY));
}

/**
* @param string $pattern
* @param string $subject
* @param int $limit
*/
public static function dynamicFlags($pattern, $subject, $limit = -1) {
public static function dynamicFlags($pattern, $subject, $limit = -1)
{
$flags = PREG_SPLIT_OFFSET_CAPTURE;

if ($subject === '1-2-3') {
Expand Down
Loading