throw exceptions instead of triggering warnings

splitbrain · splitbrain · commit 4a85ee9f2933 · 2023-03-11T23:14:03.000+01:00
diff --git a/src/Exception.php b/src/Exception.php
@@ -0,0 +1,7 @@
+<?php
+
+namespace splitbrain\JSStrip;
+
+class Exception extends \Exception
+{
+}
diff --git a/src/JSStrip.php b/src/JSStrip.php
@@ -30,49 +30,59 @@ class JSStrip
      */
     const OPS = "+-/";
 
+    protected $source;
+    protected $idx = 0;
+    protected $line = 0;
+
     /**
      * Compress the given code
-     * 
+     *
      * @param string $source The JavaScript code to compress
      * @return string
+     * @throws Exception if parsing fails
      */
     function compress($source)
     {
         $source = ltrim($source);     // strip all initial whitespace
         $source .= "\n";
-        $i = 0;             // char index for input string
+        $idx = 0;             // char index for input string
+
+        // track these as member variables
+        $this->source = $source;
+        $this->line = 1;
+        $this->idx = &$idx;
+
         $j = 0;             // char forward index for input string
-        $line = 0;          // line number of file (close to it anyways)
         $slen = strlen($source); // size of input string
         $lch = '';         // last char added
         $result = '';       // we store the final result here
 
 
-        while ($i < $slen) {
+        while ($idx < $slen) {
             // skip all "boring" characters.  This is either
             // reserved word (e.g. "for", "else", "if") or a
             // variable/object/method (e.g. "foo.color")
-            while ($i < $slen && (strpos(self::CHARS, $source[$i]) === false)) {
-                $result .= $source[$i];
-                $i = $i + 1;
+            while ($idx < $slen && (strpos(self::CHARS, $source[$idx]) === false)) {
+                $result .= $source[$idx];
+                $idx = $idx + 1;
             }
 
-            $ch = $source[$i];
+            $ch = $source[$idx];
             // multiline comments (keeping IE conditionals)
-            if ($ch == '/' && $source[$i + 1] == '*' && $source[$i + 2] != '@') {
-                $endC = strpos($source, '*/', $i + 2);
-                if ($endC === false) trigger_error('Found invalid /*..*/ comment', E_USER_ERROR);
+            if ($ch == '/' && $source[$idx + 1] == '*' && $source[$idx + 2] != '@') {
+                $endC = strpos($source, '*/', $idx + 2);
+                if ($endC === false) $this->fatal('Found invalid /*..*/ comment');
 
                 // check if this is a NOCOMPRESS comment
-                if (substr($source, $i, $endC + 2 - $i) == '/* BEGIN NOCOMPRESS */') {
+                if (substr($source, $idx, $endC + 2 - $idx) == '/* BEGIN NOCOMPRESS */') {
                     // take nested NOCOMPRESS comments into account
                     $depth = 0;
                     $nextNC = $endC;
                     do {
                         $beginNC = strpos($source, '/* BEGIN NOCOMPRESS */', $nextNC + 2);
                         $endNC = strpos($source, '/* END NOCOMPRESS */', $nextNC + 2);
 
-                        if ($endNC === false) trigger_error('Found invalid NOCOMPRESS comment', E_USER_ERROR);
+                        if ($endNC === false) $this->fatal('Found invalid NOCOMPRESS comment');
                         if ($beginNC !== false && $beginNC < $endNC) {
                             $depth++;
                             $nextNC = $beginNC;
@@ -83,34 +93,34 @@ function compress($source)
                     } while ($depth >= 0);
 
                     // verbatim copy contents, trimming but putting it on its own line
-                    $result .= "\n" . trim(substr($source, $i + 22, $endNC - ($i + 22))) . "\n"; // BEGIN comment = 22 chars
-                    $i = $endNC + 20; // END comment = 20 chars
+                    $result .= "\n" . trim(substr($source, $idx + 22, $endNC - ($idx + 22))) . "\n"; // BEGIN comment = 22 chars
+                    $idx = $endNC + 20; // END comment = 20 chars
                 } else {
-                    $i = $endC + 2;
+                    $idx = $endC + 2;
                 }
                 continue;
             }
 
             // singleline
-            if ($ch == '/' && $source[$i + 1] == '/') {
-                $endC = strpos($source, "\n", $i + 2);
-                if ($endC === false) trigger_error('Invalid comment', E_USER_ERROR);
-                $i = $endC;
+            if ($ch == '/' && $source[$idx + 1] == '/') {
+                $endC = strpos($source, "\n", $idx + 2);
+                if ($endC === false) $this->fatal('Invalid comment'); // not sure this can happen
+                $idx = $endC;
                 continue;
             }
 
             // tricky.  might be an RE
             if ($ch == '/') {
                 // rewind, skip white space
                 $j = 1;
-                while (in_array($source[$i - $j], self::WHITESPACE_CHARS)) {
+                while (in_array($source[$idx - $j], self::WHITESPACE_CHARS)) {
                     $j = $j + 1;
                 }
                 if (current(array_filter(
                     self::REGEX_STARTERS,
-                    function ($e) use ($source, $i, $j) {
+                    function ($e) use ($source, $idx, $j) {
                         $len = strlen($e);
-                        $idx = $i - $j + 1 - $len;
+                        $idx = $idx - $j + 1 - $len;
                         return substr($source, $idx, $len) === $e;
                     }
                 ))) {
@@ -119,94 +129,95 @@ function ($e) use ($source, $i, $j) {
                     $j = 1;
                     // we set this flag when inside a character class definition, enclosed by brackets [] where '/' does not terminate the re
                     $ccd = false;
-                    while ($ccd || $source[$i + $j] != '/') {
-                        if ($source[$i + $j] == '\\') $j = $j + 2;
+                    while ($ccd || $source[$idx + $j] != '/') {
+                        if ($source[$idx + $j] == '\\') $j = $j + 2;
                         else {
                             $j++;
                             // check if we entered/exited a character class definition and set flag accordingly
-                            if ($source[$i + $j - 1] == '[') $ccd = true;
-                            else if ($source[$i + $j - 1] == ']') $ccd = false;
+                            if ($source[$idx + $j - 1] == '[') $ccd = true;
+                            else if ($source[$idx + $j - 1] == ']') $ccd = false;
                         }
                     }
-                    $result .= substr($source, $i, $j + 1);
-                    $i = $i + $j + 1;
+                    $result .= substr($source, $idx, $j + 1);
+                    $idx = $idx + $j + 1;
                     continue;
                 }
             }
 
             // double quote strings
             if ($ch == '"') {
                 $j = 1;
-                while (($i + $j < $slen) && $source[$i + $j] != '"') {
-                    if ($source[$i + $j] == '\\' && ($source[$i + $j + 1] == '"' || $source[$i + $j + 1] == '\\')) {
+                while (($idx + $j < $slen) && $source[$idx + $j] != '"') {
+                    if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == '"' || $source[$idx + $j + 1] == '\\')) {
                         $j += 2;
                     } else {
                         $j += 1;
                     }
                 }
-                $string = substr($source, $i, $j + 1);
+                $string = substr($source, $idx, $j + 1);
                 // remove multiline markers:
                 $string = str_replace("\\\n", '', $string);
                 $result .= $string;
-                $i = $i + $j + 1;
+                $idx = $idx + $j + 1;
                 continue;
             }
 
             // single quote strings
             if ($ch == "'") {
                 $j = 1;
-                while (($i + $j < $slen) && $source[$i + $j] != "'") {
-                    if ($source[$i + $j] == '\\' && ($source[$i + $j + 1] == "'" || $source[$i + $j + 1] == '\\')) {
+                while (($idx + $j < $slen) && $source[$idx + $j] != "'") {
+                    if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "'" || $source[$idx + $j + 1] == '\\')) {
                         $j += 2;
                     } else {
                         $j += 1;
                     }
                 }
-                $string = substr($source, $i, $j + 1);
+                $string = substr($source, $idx, $j + 1);
                 // remove multiline markers:
                 $string = str_replace("\\\n", '', $string);
                 $result .= $string;
-                $i = $i + $j + 1;
+                $idx = $idx + $j + 1;
                 continue;
             }
 
             // backtick strings
             if ($ch == "`") {
                 $j = 1;
-                while (($i + $j < $slen) && $source[$i + $j] != "`") {
-                    if ($source[$i + $j] == '\\' && ($source[$i + $j + 1] == "`" || $source[$i + $j + 1] == '\\')) {
+                while (($idx + $j < $slen) && $source[$idx + $j] != "`") {
+                    if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "`" || $source[$idx + $j + 1] == '\\')) {
                         $j += 2;
                     } else {
                         $j += 1;
                     }
                 }
-                $string = substr($source, $i, $j + 1);
+                $string = substr($source, $idx, $j + 1);
                 // remove multiline markers:
                 $string = str_replace("\\\n", '', $string);
                 $result .= $string;
-                $i = $i + $j + 1;
+                $idx = $idx + $j + 1;
                 continue;
             }
 
             // whitespaces
             if ($ch == ' ' || $ch == "\r" || $ch == "\n" || $ch == "\t") {
                 $lch = substr($result, -1);
+                if ($ch == "\n") $this->line++;
 
                 // Only consider deleting whitespace if the signs before and after
                 // are not equal and are not an operator which may not follow itself.
-                if ($i + 1 < $slen && ((!$lch || $source[$i + 1] == ' ')
-                        || $lch != $source[$i + 1]
-                        || strpos(self::OPS, $source[$i + 1]) === false)) {
+                if ($idx + 1 < $slen && ((!$lch || $source[$idx + 1] == ' ')
+                        || $lch != $source[$idx + 1]
+                        || strpos(self::OPS, $source[$idx + 1]) === false)) {
                     // leading spaces
-                    if ($i + 1 < $slen && (strpos(self::CHARS, $source[$i + 1]) !== false)) {
-                        $i = $i + 1;
+                    if ($idx + 1 < $slen && (strpos(self::CHARS, $source[$idx + 1]) !== false)) {
+                        $idx = $idx + 1;
                         continue;
                     }
                     // trailing spaces
                     //  if this ch is space AND the last char processed
                     //  is special, then skip the space
                     if ($lch && (strpos(self::CHARS, $lch) !== false)) {
-                        $i = $i + 1;
+                        $idx = $idx + 1;
                         continue;
                     }
                 }
@@ -218,9 +229,26 @@ function ($e) use ($source, $i, $j) {
 
             // other chars
             $result .= $ch;
-            $i = $i + 1;
+            $idx = $idx + 1;
         }
 
         return trim($result);
     }
+
+    /**
+     * Helper to throw a fatal error
+     *
+     * Tries to give some context to locate the error
+     *
+     * @param string $msg
+     * @throws Exception
+     */
+    protected function fatal($msg)
+    {
+        $before = substr($this->source, max(0, $this->idx - 15), $this->idx);
+        $after = substr($this->source, $this->idx, 15);
+
+        $msg = "$msg on line {$this->line}: '{$before}◎{$after}'";
+        throw new Exception($msg);
+    }
 }
diff --git a/tests/JSStripTest.php b/tests/JSStripTest.php
@@ -3,6 +3,7 @@
 namespace splitbrain\JSStrip\tests;
 
 use PHPUnit\Framework\TestCase;
+use splitbrain\JSStrip\Exception;
 use splitbrain\JSStrip\JSStrip;
 
 class JSStripTest extends TestCase
@@ -132,8 +133,31 @@ public function provideFileData()
      * @param string $output
      * @param string $file
      */
-    function testFileData($input, $output, $file)
+    public function testFileData($input, $output, $file)
     {
         $this->assertEquals($output, (new JSStrip())->compress($input), $file);
     }
+
+    /**
+     * Test cases that should throw an exception
+     *
+     * @return array[] [input]
+     * @see testException
+     */
+    public function provideException() {
+        return [
+            ['foo /* BEGIN NOCOMPRESS */ we never end this block'],
+            ["/* we never end this comment"],
+        ];
+    }
+
+    /**
+     * @dataProvider provideException
+     * @param string $input
+     */
+    public function testException($input)
+    {
+        $this->expectException(Exception::class);
+        (new JSStrip())->compress($input);
+    }
 }

-Original file line number
+Diff line change
@@ @@ -0,0 +1,7 @@ @@
 +<?php
++
 +namespace splitbrain\JSStrip;
++
 +class Exception extends \Exception
 +{
 +}