extract constants from compress method

splitbrain · splitbrain · commit bac3bfe0d94a · 2023-03-11T22:40:00.000+01:00
A first start to somewhat shorted that ultralong method
diff --git a/src/JSStrip.php b/src/JSStrip.php
@@ -15,54 +15,62 @@
 class JSStrip
 {
 
+    const REGEX_STARTERS = [
+        '(', '=', '<', '>', '?', '[', '{', ',', ';', ':', '!', '&', '|', '+', '-', '%', '~', '^',
+        'return', 'yield', 'else', 'throw', 'await'
+    ];
+    const WHITESPACE_CHARS = [" ", "\t", "\n", "\r", "\0", "\x0B"];
+
+    /** items that don't need spaces next to them */
+    const CHARS = "^&|!+\-*\/%=\?:;,{}()<>% \t\n\r'\"`[]~^";
+
     /**
-     * @param string $s
+     * items which need a space if the sign before and after whitespace is equal.
+     * E.g. '+ ++' may not be compressed to '+++' --> syntax error.
+     */
+    const OPS = "+-/";
+
+    /**
+     * Compress the given code
+     * 
+     * @param string $source The JavaScript code to compress
      * @return string
      */
-    function compress($s)
+    function compress($source)
     {
-        $s = ltrim($s);     // strip all initial whitespace
-        $s .= "\n";
+        $source = ltrim($source);     // strip all initial whitespace
+        $source .= "\n";
         $i = 0;             // char index for input string
         $j = 0;             // char forward index for input string
         $line = 0;          // line number of file (close to it anyways)
-        $slen = strlen($s); // size of input string
+        $slen = strlen($source); // size of input string
         $lch = '';         // last char added
         $result = '';       // we store the final result here
 
-        // items that don't need spaces next to them
-        $chars = "^&|!+\-*\/%=\?:;,{}()<>% \t\n\r'\"`[]~^";
-
-        // items which need a space if the sign before and after whitespace is equal.
-        // E.g. '+ ++' may not be compressed to '+++' --> syntax error.
-        $ops = "+-/";
-
-        $regex_starters = array("(", "=", "<", ">", "?", "[", "{", ",", ";", ":", "!", "&", "|", "+", "-", "%", "~", "^", "return", "yield", "else", "throw", "await");
-        $whitespaces_chars = array(" ", "\t", "\n", "\r", "\0", "\x0B");
 
         while ($i < $slen) {
             // skip all "boring" characters.  This is either
             // reserved word (e.g. "for", "else", "if") or a
             // variable/object/method (e.g. "foo.color")
-            while ($i < $slen && (strpos($chars, $s[$i]) === false)) {
-                $result .= $s[$i];
+            while ($i < $slen && (strpos(self::CHARS, $source[$i]) === false)) {
+                $result .= $source[$i];
                 $i = $i + 1;
             }
 
-            $ch = $s[$i];
+            $ch = $source[$i];
             // multiline comments (keeping IE conditionals)
-            if ($ch == '/' && $s[$i + 1] == '*' && $s[$i + 2] != '@') {
-                $endC = strpos($s, '*/', $i + 2);
+            if ($ch == '/' && $source[$i + 1] == '*' && $source[$i + 2] != '@') {
+                $endC = strpos($source, '*/', $i + 2);
                 if ($endC === false) trigger_error('Found invalid /*..*/ comment', E_USER_ERROR);
 
                 // check if this is a NOCOMPRESS comment
-                if (substr($s, $i, $endC + 2 - $i) == '/* BEGIN NOCOMPRESS */') {
+                if (substr($source, $i, $endC + 2 - $i) == '/* BEGIN NOCOMPRESS */') {
                     // take nested NOCOMPRESS comments into account
                     $depth = 0;
                     $nextNC = $endC;
                     do {
-                        $beginNC = strpos($s, '/* BEGIN NOCOMPRESS */', $nextNC + 2);
-                        $endNC = strpos($s, '/* END NOCOMPRESS */', $nextNC + 2);
+                        $beginNC = strpos($source, '/* BEGIN NOCOMPRESS */', $nextNC + 2);
+                        $endNC = strpos($source, '/* END NOCOMPRESS */', $nextNC + 2);
 
                         if ($endNC === false) trigger_error('Found invalid NOCOMPRESS comment', E_USER_ERROR);
                         if ($beginNC !== false && $beginNC < $endNC) {
@@ -75,7 +83,7 @@ function compress($s)
                     } while ($depth >= 0);
 
                     // verbatim copy contents, trimming but putting it on its own line
-                    $result .= "\n" . trim(substr($s, $i + 22, $endNC - ($i + 22))) . "\n"; // BEGIN comment = 22 chars
+                    $result .= "\n" . trim(substr($source, $i + 22, $endNC - ($i + 22))) . "\n"; // BEGIN comment = 22 chars
                     $i = $endNC + 20; // END comment = 20 chars
                 } else {
                     $i = $endC + 2;
@@ -84,8 +92,8 @@ function compress($s)
             }
 
             // singleline
-            if ($ch == '/' && $s[$i + 1] == '/') {
-                $endC = strpos($s, "\n", $i + 2);
+            if ($ch == '/' && $source[$i + 1] == '/') {
+                $endC = strpos($source, "\n", $i + 2);
                 if ($endC === false) trigger_error('Invalid comment', E_USER_ERROR);
                 $i = $endC;
                 continue;
@@ -95,32 +103,32 @@ function compress($s)
             if ($ch == '/') {
                 // rewind, skip white space
                 $j = 1;
-                while (in_array($s[$i - $j], $whitespaces_chars)) {
+                while (in_array($source[$i - $j], self::WHITESPACE_CHARS)) {
                     $j = $j + 1;
                 }
                 if (current(array_filter(
-                    $regex_starters,
-                    function ($e) use ($s, $i, $j) {
+                    self::REGEX_STARTERS,
+                    function ($e) use ($source, $i, $j) {
                         $len = strlen($e);
                         $idx = $i - $j + 1 - $len;
-                        return substr($s, $idx, $len) === $e;
+                        return substr($source, $idx, $len) === $e;
                     }
                 ))) {
                     // yes, this is an re
                     // now move forward and find the end of it
                     $j = 1;
                     // we set this flag when inside a character class definition, enclosed by brackets [] where '/' does not terminate the re
                     $ccd = false;
-                    while ($ccd || $s[$i + $j] != '/') {
-                        if ($s[$i + $j] == '\\') $j = $j + 2;
+                    while ($ccd || $source[$i + $j] != '/') {
+                        if ($source[$i + $j] == '\\') $j = $j + 2;
                         else {
                             $j++;
                             // check if we entered/exited a character class definition and set flag accordingly
-                            if ($s[$i + $j - 1] == '[') $ccd = true;
-                            else if ($s[$i + $j - 1] == ']') $ccd = false;
+                            if ($source[$i + $j - 1] == '[') $ccd = true;
+                            else if ($source[$i + $j - 1] == ']') $ccd = false;
                         }
                     }
-                    $result .= substr($s, $i, $j + 1);
+                    $result .= substr($source, $i, $j + 1);
                     $i = $i + $j + 1;
                     continue;
                 }
@@ -129,14 +137,14 @@ function ($e) use ($s, $i, $j) {
             // double quote strings
             if ($ch == '"') {
                 $j = 1;
-                while (($i + $j < $slen) && $s[$i + $j] != '"') {
-                    if ($s[$i + $j] == '\\' && ($s[$i + $j + 1] == '"' || $s[$i + $j + 1] == '\\')) {
+                while (($i + $j < $slen) && $source[$i + $j] != '"') {
+                    if ($source[$i + $j] == '\\' && ($source[$i + $j + 1] == '"' || $source[$i + $j + 1] == '\\')) {
                         $j += 2;
                     } else {
                         $j += 1;
                     }
                 }
-                $string = substr($s, $i, $j + 1);
+                $string = substr($source, $i, $j + 1);
                 // remove multiline markers:
                 $string = str_replace("\\\n", '', $string);
                 $result .= $string;
@@ -147,14 +155,14 @@ function ($e) use ($s, $i, $j) {
             // single quote strings
             if ($ch == "'") {
                 $j = 1;
-                while (($i + $j < $slen) && $s[$i + $j] != "'") {
-                    if ($s[$i + $j] == '\\' && ($s[$i + $j + 1] == "'" || $s[$i + $j + 1] == '\\')) {
+                while (($i + $j < $slen) && $source[$i + $j] != "'") {
+                    if ($source[$i + $j] == '\\' && ($source[$i + $j + 1] == "'" || $source[$i + $j + 1] == '\\')) {
                         $j += 2;
                     } else {
                         $j += 1;
                     }
                 }
-                $string = substr($s, $i, $j + 1);
+                $string = substr($source, $i, $j + 1);
                 // remove multiline markers:
                 $string = str_replace("\\\n", '', $string);
                 $result .= $string;
@@ -165,14 +173,14 @@ function ($e) use ($s, $i, $j) {
             // backtick strings
             if ($ch == "`") {
                 $j = 1;
-                while (($i + $j < $slen) && $s[$i + $j] != "`") {
-                    if ($s[$i + $j] == '\\' && ($s[$i + $j + 1] == "`" || $s[$i + $j + 1] == '\\')) {
+                while (($i + $j < $slen) && $source[$i + $j] != "`") {
+                    if ($source[$i + $j] == '\\' && ($source[$i + $j + 1] == "`" || $source[$i + $j + 1] == '\\')) {
                         $j += 2;
                     } else {
                         $j += 1;
                     }
                 }
-                $string = substr($s, $i, $j + 1);
+                $string = substr($source, $i, $j + 1);
                 // remove multiline markers:
                 $string = str_replace("\\\n", '', $string);
                 $result .= $string;
@@ -186,18 +194,18 @@ function ($e) use ($s, $i, $j) {
 
                 // Only consider deleting whitespace if the signs before and after
                 // are not equal and are not an operator which may not follow itself.
-                if ($i + 1 < $slen && ((!$lch || $s[$i + 1] == ' ')
-                        || $lch != $s[$i + 1]
-                        || strpos($ops, $s[$i + 1]) === false)) {
+                if ($i + 1 < $slen && ((!$lch || $source[$i + 1] == ' ')
+                        || $lch != $source[$i + 1]
+                        || strpos(self::OPS, $source[$i + 1]) === false)) {
                     // leading spaces
-                    if ($i + 1 < $slen && (strpos($chars, $s[$i + 1]) !== false)) {
+                    if ($i + 1 < $slen && (strpos(self::CHARS, $source[$i + 1]) !== false)) {
                         $i = $i + 1;
                         continue;
                     }
                     // trailing spaces
                     //  if this ch is space AND the last char processed
                     //  is special, then skip the space
-                    if ($lch && (strpos($chars, $lch) !== false)) {
+                    if ($lch && (strpos(self::CHARS, $lch) !== false)) {
                         $i = $i + 1;
                         continue;
                     }