Skip to content

String#extractScripts, stripScripts performance optimization #146

@jwestbrook

Description

@jwestbrook

[previous lighthouse ticket #431]https://prototype.lighthouseapp.com/projects/8886/tickets/431-stringextractscripts-stripscripts-performance-optimization)
by MIYAMUKO Katsuyuki


new RegExp is heavy operation in Firefox.

An attached patch is a to avoid recompiling the regexp in each method call.

Performance
1000 iteration of String#extractScripts.

IE7
0.453 => 0.172
Firefox3
2.012 => 0.046
Chrome
0.075 => 0.029
Safari3
0.063 => 0.016
Opera9
0.250 => 0.125

diff --git a/src/string.js b/src/string.js
index 58f79c2..8cd4b48 100644
--- a/src/string.js
+++ b/src/string.js
@@ -13,6 +13,8 @@ Object.extend(String, {
 });

 Object.extend(String.prototype, {
+  _scriptFragmentRegexp: new RegExp(Prototype.ScriptFragment, 'img'),
+
   gsub: function(pattern, replacement) {
     var result = '', source = this, match;
     replacement = arguments.callee.prepareReplacement(replacement);
@@ -60,15 +62,18 @@ Object.extend(String.prototype, {
   },

   stripScripts: function() {
-    return this.replace(new RegExp(Prototype.ScriptFragment, 'img'), '');
+    return this.replace(this._scriptFragmentRegexp, '');
   },

   extractScripts: function() {
-    var matchAll = new RegExp(Prototype.ScriptFragment, 'img');
-    var matchOne = new RegExp(Prototype.ScriptFragment, 'im');
-    return (this.match(matchAll) || []).map(function(scriptTag) {
-      return (scriptTag.match(matchOne) || ['', ''])[1];
-    });
+    var re = this._scriptFragmentRegexp;
+    var r = [];
+    while (true) {
+      var m = re.exec(this);
+      if (!m) break;
+      r.push(m[1]);
+    }
+    return r;
   },

   evalScripts: function() {
diff --git a/test/unit/string_test.js b/test/unit/string_test.js
index ab20366..42e0736 100644
--- a/test/unit/string_test.js
+++ b/test/unit/string_test.js
@@ -205,6 +205,12 @@ new Test.Unit.Runner({
       ('foo <script>boo();<'+'/script><script type="text/javascript">boo();\nmoo();<'+'/script>bar').extractScripts());
     this.assertEnumEqual(['boo();','boo();\nmoo();'], 
       ('foo <script>boo();<'+'/script>blub\nblub<script type="text/javascript">boo();\nmoo();<'+'/script>bar').extractScripts());
+
+    var str = 'foo <script>boo();<'+'/script>blub\nblub<script type="text/javascript">boo();\nmoo();<'+'/script>bar';
+    this.assertEnumEqual(['boo();','boo();\nmoo();'], str.extractScripts());
+    this.assertEnumEqual(['boo();','boo();\nmoo();'], str.extractScripts());
+
+    this.benchmark(function() { str.extractScripts() }, 1000);
   },

   testEvalScripts: function() {

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions