rust-lang · Apr 25, 2014
diff --git a/‎mk/crates.mk
Lines changed: 4 additions & 2 deletions b/‎mk/crates.mk
Lines changed: 4 additions & 2 deletions
diff --git a/‎mk/main.mk
Lines changed: 1 addition & 4 deletions b/‎mk/main.mk
Lines changed: 1 addition & 4 deletions
diff --git a/‎src/README.md
Lines changed: 1 addition & 0 deletions b/‎src/README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/doc/index.md
Lines changed: 1 addition & 0 deletions b/‎src/doc/index.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/etc/regex-match-tests.py
Lines changed: 109 additions & 0 deletions b/‎src/etc/regex-match-tests.py
Lines changed: 109 additions & 0 deletions
diff --git a/‎src/etc/regex-unicode-tables.py
Lines changed: 183 additions & 0 deletions b/‎src/etc/regex-unicode-tables.py
Lines changed: 183 additions & 0 deletions
diff --git a/‎src/libregex/compile.rs
Lines changed: 274 additions & 0 deletions b/‎src/libregex/compile.rs
Lines changed: 274 additions & 0 deletions
diff --git a/‎src/libregex/lib.rs
Lines changed: 426 additions & 0 deletions b/‎src/libregex/lib.rs
Lines changed: 426 additions & 0 deletions
diff --git a/‎src/libregex/parse.rs
Lines changed: 1028 additions & 0 deletions b/‎src/libregex/parse.rs
Lines changed: 1028 additions & 0 deletions
diff --git a/‎src/libregex/re.rs
Lines changed: 870 additions & 0 deletions b/‎src/libregex/re.rs
Lines changed: 870 additions & 0 deletions
diff --git a/‎src/libregex/test/bench.rs
Lines changed: 179 additions & 0 deletions b/‎src/libregex/test/bench.rs
Lines changed: 179 additions & 0 deletions
diff --git a/‎src/libregex/test/matches.rs
Lines changed: 373 additions & 0 deletions b/‎src/libregex/test/matches.rs
Lines changed: 373 additions & 0 deletions
diff --git a/‎src/libregex/test/mod.rs
Lines changed: 29 additions & 0 deletions b/‎src/libregex/test/mod.rs
Lines changed: 29 additions & 0 deletions
diff --git a/‎src/libregex/test/tests.rs
Lines changed: 199 additions & 0 deletions b/‎src/libregex/test/tests.rs
Lines changed: 199 additions & 0 deletions
diff --git a/‎src/libregex/testdata/LICENSE
Lines changed: 19 additions & 0 deletions b/‎src/libregex/testdata/LICENSE
Lines changed: 19 additions & 0 deletions
diff --git a/‎src/libregex/testdata/README
Lines changed: 17 additions & 0 deletions b/‎src/libregex/testdata/README
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/libregex/testdata/basic.dat
Lines changed: 221 additions & 0 deletions b/‎src/libregex/testdata/basic.dat
Lines changed: 221 additions & 0 deletions
diff --git a/‎src/libregex/testdata/nullsubexpr.dat
Lines changed: 79 additions & 0 deletions b/‎src/libregex/testdata/nullsubexpr.dat
Lines changed: 79 additions & 0 deletions
diff --git a/‎src/libregex/testdata/repetition.dat
Lines changed: 163 additions & 0 deletions b/‎src/libregex/testdata/repetition.dat
Lines changed: 163 additions & 0 deletions
diff --git a/‎src/libregex/unicode.rs
Lines changed: 5537 additions & 0 deletions b/‎src/libregex/unicode.rs
Lines changed: 5537 additions & 0 deletions
diff --git a/‎src/libregex/vm.rs
Lines changed: 587 additions & 0 deletions b/‎src/libregex/vm.rs
Lines changed: 587 additions & 0 deletions
diff --git a/‎src/libregex_macros/lib.rs
Lines changed: 684 additions & 0 deletions b/‎src/libregex_macros/lib.rs
Lines changed: 684 additions & 0 deletions
diff --git a/‎src/test/bench/shootout-regex-dna.rs
Lines changed: 96 additions & 0 deletions b/‎src/test/bench/shootout-regex-dna.rs
Lines changed: 96 additions & 0 deletions
diff --git a/‎src/test/compile-fail/syntax-extension-regex-invalid.rs
Lines changed: 28 additions & 0 deletions b/‎src/test/compile-fail/syntax-extension-regex-invalid.rs
Lines changed: 28 additions & 0 deletions
@@ -51,8 +51,8 @@
 
 TARGET_CRATES := libc std green rustuv native flate arena glob term semver \
                  uuid serialize sync getopts collections num test time rand \
-		 workcache url log
-HOST_CRATES := syntax rustc rustdoc fourcc hexfloat
+		 workcache url log regex
+HOST_CRATES := syntax rustc rustdoc fourcc hexfloat regex_macros
 CRATES := $(TARGET_CRATES) $(HOST_CRATES)
 TOOLS := compiletest rustdoc rustc
 
@@ -84,6 +84,8 @@ DEPS_rand := std
 DEPS_url := std collections
 DEPS_workcache := std serialize collections log
 DEPS_log := std sync
+DEPS_regex := std collections
+DEPS_regex_macros = syntax std regex
 
 TOOL_DEPS_compiletest := test green rustuv getopts
 TOOL_DEPS_rustdoc := rustdoc native
 
@@ -311,8 +311,6 @@ HSREQ$(1)_H_$(3) = $$(HBIN$(1)_H_$(3))/rustc$$(X_$(3))
 else
 HSREQ$(1)_H_$(3) = \
 	$$(HBIN$(1)_H_$(3))/rustc$$(X_$(3)) \
-	$$(HLIB$(1)_H_$(3))/stamp.rustc \
-	$$(foreach dep,$$(RUST_DEPS_rustc),$$(HLIB$(1)_H_$(3))/stamp.$$(dep)) \
 	$$(MKFILE_DEPS)
 endif
 
@@ -334,8 +332,7 @@ SREQ$(1)_T_$(2)_H_$(3) = \
 CSREQ$(1)_T_$(2)_H_$(3) = \
 	$$(TSREQ$(1)_T_$(2)_H_$(3)) \
 	$$(HBIN$(1)_H_$(3))/rustdoc$$(X_$(3)) \
-	$$(foreach dep,$$(CRATES),$$(TLIB$(1)_T_$(2)_H_$(3))/stamp.$$(dep)) \
-	$$(foreach dep,$$(HOST_CRATES),$$(HLIB$(1)_H_$(3))/stamp.$$(dep))
+	$$(foreach dep,$$(CRATES),$$(TLIB$(1)_T_$(2)_H_$(3))/stamp.$$(dep))
 
 ifeq ($(1),0)
 # Don't run the stage0 compiler under valgrind - that ship has sailed
 
@@ -19,6 +19,7 @@ Source layout:
 | `libfourcc/`        | Data format identifier library                            |
 | `libgetopts/`       | Get command-line-options library                          |
 | `libglob/`          | Unix glob patterns library                                |
+| `libregex/`         | Regular expressions                                       |
 | `libsemver/`        | Rust's semantic versioning library                        |
 | `libserialize/`     | Encode-Decode types library                               |
 | `libsync/`          | Concurrency mechanisms and primitives                     |
 
@@ -41,6 +41,7 @@ li {list-style-type: none; }
 * [The `native` 1:1 threading runtime](native/index.html)
 * [The `num` arbitrary precision numerics library](num/index.html)
 * [The `rand` library for random numbers and distributions](rand/index.html)
+* [The `regex` library for regular expressions](regex/index.html)
 * [The `rustc` compiler](rustc/index.html)
 * [The `rustuv` M:N I/O library](rustuv/index.html)
 * [The `semver` version collation library](semver/index.html)
 
@@ -0,0 +1,109 @@
+#!/usr/bin/env python2
+
+# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+from __future__ import absolute_import, division, print_function
+import argparse
+import datetime
+import os.path as path
+
+
+def print_tests(tests):
+    print('\n'.join([test_tostr(t) for t in tests]))
+
+
+def read_tests(f):
+    basename, _ = path.splitext(path.basename(f))
+    tests = []
+    for lineno, line in enumerate(open(f), 1):
+        fields = filter(None, map(str.strip, line.split('\t')))
+        if not (4 <= len(fields) <= 5) \
+           or 'E' not in fields[0] or fields[0][0] == '#':
+            continue
+
+        opts, pat, text, sgroups = fields[0:4]
+        groups = []  # groups as integer ranges
+        if sgroups == 'NOMATCH':
+            groups = [None]
+        elif ',' in sgroups:
+            noparen = map(lambda s: s.strip('()'), sgroups.split(')('))
+            for g in noparen:
+                s, e = map(str.strip, g.split(','))
+                if s == '?' and e == '?':
+                    groups.append(None)
+                else:
+                    groups.append((int(s), int(e)))
+        else:
+            # This skips tests that should result in an error.
+            # There aren't many, so I think we can just capture those
+            # manually. Possibly fix this in future.
+            continue
+
+        if pat == 'SAME':
+            pat = tests[-1][1]
+        if '$' in opts:
+            pat = pat.decode('string_escape')
+            text = text.decode('string_escape')
+        if 'i' in opts:
+            pat = '(?i)%s' % pat
+
+        name = '%s_%d' % (basename, lineno)
+        tests.append((name, pat, text, groups))
+    return tests
+
+
+def test_tostr(t):
+    lineno, pat, text, groups = t
+    options = map(group_tostr, groups)
+    return 'mat!(match_%s, r"%s", r"%s", %s)' \
+           % (lineno, pat, '' if text == "NULL" else text, ', '.join(options))
+
+
+def group_tostr(g):
+    if g is None:
+        return 'None'
+    else:
+        return 'Some((%d, %d))' % (g[0], g[1])
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Generate match tests from an AT&T POSIX test file.')
+    aa = parser.add_argument
+    aa('files', nargs='+',
+       help='A list of dat AT&T POSIX test files. See src/libregexp/testdata')
+    args = parser.parse_args()
+
+    tests = []
+    for f in args.files:
+        tests += read_tests(f)
+
+    tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// ignore-tidy-linelength
+
+// DO NOT EDIT. Automatically generated by 'src/etc/regexp-match-tests'
+// on {date}.
+'''
+    print(tpl.format(date=str(datetime.datetime.now())))
+
+    for f in args.files:
+        print('// Tests from %s' % path.basename(f))
+        print_tests(read_tests(f))
+        print('')
@@ -0,0 +1,183 @@
+#!/usr/bin/env python2
+
+# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+from __future__ import absolute_import, division, print_function
+import argparse
+from collections import defaultdict
+import csv
+import datetime
+import urllib2
+
+BASE_URL = 'http://www.unicode.org/Public/6.3.0/ucd/'
+DATA = 'UnicodeData.txt'
+SCRIPTS = 'Scripts.txt'
+
+# Mapping taken from Table 12 from:
+# http://www.unicode.org/reports/tr44/#General_Category_Values
+expanded_categories = {
+    'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
+    'Lm': ['L'], 'Lo': ['L'],
+    'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
+    'Nd': ['N'], 'Nl': ['N'], 'No': ['No'],
+    'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
+    'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
+    'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
+    'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
+    'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
+}
+
+
+def as_4byte_uni(n):
+    s = hex(n)[2:]
+    return '\\U%s%s' % ('0' * (8 - len(s)), s)
+
+
+def expand_cat(c):
+    return expanded_categories.get(c, []) + [c]
+
+
+def is_valid_unicode(n):
+    return 0 <= n <= 0xD7FF or 0xE000 <= n <= 0x10FFFF
+
+
+def read_cats(f):
+    assigned = defaultdict(list)
+    for row in csv.reader(f, delimiter=';'):
+        (hex, cats) = (int(row[0], 16), expand_cat(row[2]))
+        if not is_valid_unicode(hex):
+            continue
+        for cat in cats:
+            assigned[cat].append(hex)
+    return assigned
+
+
+def read_scripts(f):
+    assigned = defaultdict(list)
+    for line in f:
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        hexes, name = map(str.strip, line.split(';'))[:2]
+        name = name[:name.index('#')].strip()
+        if '..' not in hexes:
+            hex = int(hexes, 16)
+            if is_valid_unicode(hex):
+                assigned[name].append(hex)
+        else:
+            hex1, hex2 = map(lambda s: int(s, 16), hexes.split('..'))
+            for hex in xrange(hex1, hex2 + 1):
+                if is_valid_unicode(hex):
+                    assigned[name].append(hex)
+    return assigned
+
+
+def group(letters):
+    letters = sorted(set(letters))
+    grouped = []
+    cur_start = letters.pop(0)
+    cur_end = cur_start
+    for letter in letters:
+        assert letter > cur_end, \
+            'cur_end: %s, letter: %s' % (hex(cur_end), hex(letter))
+
+        if letter == cur_end + 1:
+            cur_end = letter
+        else:
+            grouped.append((cur_start, cur_end))
+            cur_start, cur_end = letter, letter
+    grouped.append((cur_start, cur_end))
+    return grouped
+
+
+def ranges_to_rust(rs):
+    rs = ("('%s', '%s')" % (as_4byte_uni(s), as_4byte_uni(e)) for s, e in rs)
+    return ',\n    '.join(rs)
+
+
+def groups_to_rust(groups):
+    rust_groups = []
+    for group_name in sorted(groups):
+        rust_groups.append('("%s", &[\n    %s\n    ]),'
+                           % (group_name, ranges_to_rust(groups[group_name])))
+    return '\n'.join(rust_groups)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Generate Unicode character class tables.')
+    aa = parser.add_argument
+    aa('--local', action='store_true',
+       help='When set, Scripts.txt and UnicodeData.txt will be read from '
+            'the CWD.')
+    aa('--base-url', type=str, default=BASE_URL,
+       help='The base URL to use for downloading Unicode data files.')
+    args = parser.parse_args()
+
+    if args.local:
+        cats = read_cats(open(DATA))
+        scripts = read_scripts(open(SCRIPTS))
+    else:
+        cats = read_cats(urllib2.urlopen(args.base_url + '/' + DATA))
+        scripts = read_scripts(urllib2.urlopen(args.base_url + '/' + SCRIPTS))
+
+    # Get Rust code for all Unicode general categories and scripts.
+    combined = dict(cats, **scripts)
+    unigroups = groups_to_rust({k: group(letters)
+                                for k, letters in combined.items()})
+
+    # Now get Perl character classes that are Unicode friendly.
+    perld = range(ord('0'), ord('9') + 1)
+    dgroups = ranges_to_rust(group(perld + cats['Nd'][:]))
+
+    perls = map(ord, ['\t', '\n', '\x0C', '\r', ' '])
+    sgroups = ranges_to_rust(group(perls + cats['Z'][:]))
+
+    low, up = (range(ord('a'), ord('z') + 1), range(ord('A'), ord('Z') + 1))
+    perlw = [ord('_')] + perld + low + up
+    wgroups = ranges_to_rust(group(perlw + cats['L'][:]))
+
+    tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// DO NOT EDIT. Automatically generated by 'src/etc/regexp-unicode-tables'
+// on {date}.
+
+use parse::{{Class, NamedClasses}};
+
+pub static UNICODE_CLASSES: NamedClasses = &[
+
+{groups}
+
+];
+
+pub static PERLD: Class = &[
+    {dgroups}
+];
+
+pub static PERLS: Class = &[
+    {sgroups}
+];
+
+pub static PERLW: Class = &[
+    {wgroups}
+];
+'''
+    now = datetime.datetime.now()
+    print(tpl.format(date=str(now), groups=unigroups,
+                     dgroups=dgroups, sgroups=sgroups, wgroups=wgroups))
@@ -0,0 +1,274 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// Enable this to squash warnings due to exporting pieces of the representation
+// for use with the regex! macro. See lib.rs for explanation.
+#![allow(visible_private_types)]
+
+use std::cmp;
+use std::iter;
+use parse;
+use parse::{
+    Flags, FLAG_EMPTY,
+    Nothing, Literal, Dot, Class, Begin, End, WordBoundary, Capture, Cat, Alt,
+    Rep,
+    ZeroOne, ZeroMore, OneMore,
+};
+
+type InstIdx = uint;
+
+#[deriving(Show, Clone)]
+pub enum Inst {
+    // When a Match instruction is executed, the current thread is successful.
+    Match,
+
+    // The OneChar instruction matches a literal character.
+    // The flags indicate whether to do a case insensitive match.
+    OneChar(char, Flags),
+
+    // The CharClass instruction tries to match one input character against
+    // the range of characters given.
+    // The flags indicate whether to do a case insentivie match and whether
+    // the character class is negated or not.
+    CharClass(Vec<(char, char)>, Flags),
+
+    // Matches any character except new lines.
+    // The flags indicate whether to include the '\n' character.
+    Any(Flags),
+
+    // Matches the beginning of the string, consumes no characters.
+    // The flags indicate whether it matches if the preceding character
+    // is a new line.
+    EmptyBegin(Flags),
+
+    // Matches the end of the string, consumes no characters.
+    // The flags indicate whether it matches if the proceding character
+    // is a new line.
+    EmptyEnd(Flags),
+
+    // Matches a word boundary (\w on one side and \W \A or \z on the other),
+    // and consumes no character.
+    // The flags indicate whether this matches a word boundary or something
+    // that isn't a word boundary.
+    EmptyWordBoundary(Flags),
+
+    // Saves the current position in the input string to the Nth save slot.
+    Save(uint),
+
+    // Jumps to the instruction at the index given.
+    Jump(InstIdx),
+
+    // Jumps to the instruction at the first index given. If that leads to
+    // a failing state, then the instruction at the second index given is
+    // tried.
+    Split(InstIdx, InstIdx),
+}
+
+/// Program represents a compiled regular expression. Once an expression is
+/// compiled, its representation is immutable and will never change.
+///
+/// All of the data in a compiled expression is wrapped in "MaybeStatic" or
+/// "MaybeOwned" types so that a `Program` can be represented as static data.
+/// (This makes it convenient and efficient for use with the `regex!` macro.)
+#[deriving(Clone)]
+pub struct Program {
+    /// A sequence of instructions.
+    pub insts: Vec<Inst>,
+    /// If the regular expression requires a literal prefix in order to have a
+    /// match, that prefix is stored here. (It's used in the VM to implement
+    /// an optimization.)
+    pub prefix: ~str,
+}
+
+impl Program {
+    /// Compiles a Regex given its AST.
+    pub fn new(ast: ~parse::Ast) -> (Program, ~[Option<~str>]) {
+        let mut c = Compiler {
+            insts: Vec::with_capacity(100),
+            names: Vec::with_capacity(10),
+        };
+
+        c.insts.push(Save(0));
+        c.compile(ast);
+        c.insts.push(Save(1));
+        c.insts.push(Match);
+
+        // Try to discover a literal string prefix.
+        // This is a bit hacky since we have to skip over the initial
+        // 'Save' instruction.
+        let mut pre = StrBuf::with_capacity(5);
+        for i in iter::range(1, c.insts.len()) {
+            match *c.insts.get(i) {
+                OneChar(c, FLAG_EMPTY) => pre.push_char(c),
+                _ => break
+            }
+        }
+
+        let names = c.names.as_slice().into_owned();
+        let prog = Program {
+            insts: c.insts,
+            prefix: pre.into_owned(),
+        };
+        (prog, names)
+    }
+
+    /// Returns the total number of capture groups in the regular expression.
+    /// This includes the zeroth capture.
+    pub fn num_captures(&self) -> uint {
+        let mut n = 0;
+        for inst in self.insts.iter() {
+            match *inst {
+                Save(c) => n = cmp::max(n, c+1),
+                _ => {}
+            }
+        }
+        // There's exactly 2 Save slots for every capture.
+        n / 2
+    }
+}
+
+struct Compiler<'r> {
+    insts: Vec<Inst>,
+    names: Vec<Option<~str>>,
+}
+
+// The compiler implemented here is extremely simple. Most of the complexity
+// in this crate is in the parser or the VM.
+// The only tricky thing here is patching jump/split instructions to point to
+// the right instruction.
+impl<'r> Compiler<'r> {
+    fn compile(&mut self, ast: ~parse::Ast) {
+        match ast {
+            ~Nothing => {},
+            ~Literal(c, flags) => self.push(OneChar(c, flags)),
+            ~Dot(nl) => self.push(Any(nl)),
+            ~Class(ranges, flags) =>
+                self.push(CharClass(ranges, flags)),
+            ~Begin(flags) => self.push(EmptyBegin(flags)),
+            ~End(flags) => self.push(EmptyEnd(flags)),
+            ~WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
+            ~Capture(cap, name, x) => {
+                let len = self.names.len();
+                if cap >= len {
+                    self.names.grow(10 + cap - len, &None)
+                }
+                *self.names.get_mut(cap) = name;
+
+                self.push(Save(2 * cap));
+                self.compile(x);
+                self.push(Save(2 * cap + 1));
+            }
+            ~Cat(xs) => {
+                for x in xs.move_iter() {
+                    self.compile(x)
+                }
+            }
+            ~Alt(x, y) => {
+                let split = self.empty_split(); // push: split 0, 0
+                let j1 = self.insts.len();
+                self.compile(x);                // push: insts for x
+                let jmp = self.empty_jump();    // push: jmp 0
+                let j2 = self.insts.len();
+                self.compile(y);                // push: insts for y
+                let j3 = self.insts.len();
+
+                self.set_split(split, j1, j2);  // split 0, 0 -> split j1, j2
+                self.set_jump(jmp, j3);         // jmp 0      -> jmp j3
+            }
+            ~Rep(x, ZeroOne, g) => {
+                let split = self.empty_split();
+                let j1 = self.insts.len();
+                self.compile(x);
+                let j2 = self.insts.len();
+
+                if g.is_greedy() {
+                    self.set_split(split, j1, j2);
+                } else {
+                    self.set_split(split, j2, j1);
+                }
+            }
+            ~Rep(x, ZeroMore, g) => {
+                let j1 = self.insts.len();
+                let split = self.empty_split();
+                let j2 = self.insts.len();
+                self.compile(x);
+                let jmp = self.empty_jump();
+                let j3 = self.insts.len();
+
+                self.set_jump(jmp, j1);
+                if g.is_greedy() {
+                    self.set_split(split, j2, j3);
+                } else {
+                    self.set_split(split, j3, j2);
+                }
+            }
+            ~Rep(x, OneMore, g) => {
+                let j1 = self.insts.len();
+                self.compile(x);
+                let split = self.empty_split();
+                let j2 = self.insts.len();
+
+                if g.is_greedy() {
+                    self.set_split(split, j1, j2);
+                } else {
+                    self.set_split(split, j2, j1);
+                }
+            }
+        }
+    }
+
+    /// Appends the given instruction to the program.
+    #[inline]
+    fn push(&mut self, x: Inst) {
+        self.insts.push(x)
+    }
+
+    /// Appends an *empty* `Split` instruction to the program and returns
+    /// the index of that instruction. (The index can then be used to "patch"
+    /// the actual locations of the split in later.)
+    #[inline]
+    fn empty_split(&mut self) -> InstIdx {
+        self.insts.push(Split(0, 0));
+        self.insts.len() - 1
+    }
+
+    /// Sets the left and right locations of a `Split` instruction at index
+    /// `i` to `pc1` and `pc2`, respectively.
+    /// If the instruction at index `i` isn't a `Split` instruction, then
+    /// `fail!` is called.
+    #[inline]
+    fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
+        let split = self.insts.get_mut(i);
+        match *split {
+            Split(_, _) => *split = Split(pc1, pc2),
+            _ => fail!("BUG: Invalid split index."),
+        }
+    }
+
+    /// Appends an *empty* `Jump` instruction to the program and returns the
+    /// index of that instruction.
+    #[inline]
+    fn empty_jump(&mut self) -> InstIdx {
+        self.insts.push(Jump(0));
+        self.insts.len() - 1
+    }
+
+    /// Sets the location of a `Jump` instruction at index `i` to `pc`.
+    /// If the instruction at index `i` isn't a `Jump` instruction, then
+    /// `fail!` is called.
+    #[inline]
+    fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
+        let jmp = self.insts.get_mut(i);
+        match *jmp {
+            Jump(_) => *jmp = Jump(pc),
+            _ => fail!("BUG: Invalid jump index."),
+        }
+    }
+}
@@ -0,0 +1,179 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use rand::{Rng, task_rng};
+use stdtest::Bencher;
+use std::str;
+use regex::{Regex, NoExpand};
+
+fn bench_assert_match(b: &mut Bencher, re: Regex, text: &str) {
+    b.iter(|| if !re.is_match(text) { fail!("no match") });
+}
+
+#[bench]
+fn no_exponential(b: &mut Bencher) {
+    let n = 100;
+    let re = Regex::new("a?".repeat(n) + "a".repeat(n)).unwrap();
+    let text = "a".repeat(n);
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn literal(b: &mut Bencher) {
+    let re = regex!("y");
+    let text = "x".repeat(50) + "y";
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn not_literal(b: &mut Bencher) {
+    let re = regex!(".y");
+    let text = "x".repeat(50) + "y";
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn match_class(b: &mut Bencher) {
+    let re = regex!("[abcdw]");
+    let text = "xxxx".repeat(20) + "w";
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn match_class_in_range(b: &mut Bencher) {
+    // 'b' is between 'a' and 'c', so the class range checking doesn't help.
+    let re = regex!("[ac]");
+    let text = "bbbb".repeat(20) + "c";
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn replace_all(b: &mut Bencher) {
+    let re = regex!("[cjrw]");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    // FIXME: This isn't using the $name expand stuff.
+    // It's possible RE2/Go is using it, but currently, the expand in this
+    // crate is actually compiling a regex, so it's incredibly slow.
+    b.iter(|| re.replace_all(text, NoExpand("")));
+}
+
+#[bench]
+fn anchored_literal_short_non_match(b: &mut Bencher) {
+    let re = regex!("^zbc(d|e)");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn anchored_literal_long_non_match(b: &mut Bencher) {
+    let re = regex!("^zbc(d|e)");
+    let text = "abcdefghijklmnopqrstuvwxyz".repeat(15);
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn anchored_literal_short_match(b: &mut Bencher) {
+    let re = regex!("^.bc(d|e)");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn anchored_literal_long_match(b: &mut Bencher) {
+    let re = regex!("^.bc(d|e)");
+    let text = "abcdefghijklmnopqrstuvwxyz".repeat(15);
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_short_a(b: &mut Bencher) {
+    let re = regex!("^.bc(d|e)*$");
+    let text = "abcddddddeeeededd";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_short_a_not(b: &mut Bencher) {
+    let re = regex!(".bc(d|e)*$");
+    let text = "abcddddddeeeededd";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_short_b(b: &mut Bencher) {
+    let re = regex!("^.bc(?:d|e)*$");
+    let text = "abcddddddeeeededd";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_short_b_not(b: &mut Bencher) {
+    let re = regex!(".bc(?:d|e)*$");
+    let text = "abcddddddeeeededd";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_long_prefix(b: &mut Bencher) {
+    let re = regex!("^abcdefghijklmnopqrstuvwxyz.*$");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_long_prefix_not(b: &mut Bencher) {
+    let re = regex!("^.bcdefghijklmnopqrstuvwxyz.*$");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    b.iter(|| re.is_match(text));
+}
+
+macro_rules! throughput(
+    ($name:ident, $regex:expr, $size:expr) => (
+        #[bench]
+        fn $name(b: &mut Bencher) {
+            let text = gen_text($size);
+            b.bytes = $size;
+            b.iter(|| if $regex.is_match(text) { fail!("match") });
+        }
+    );
+)
+
+fn easy0() -> Regex { regex!("ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
+fn easy1() -> Regex { regex!("A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$") }
+fn medium() -> Regex { regex!("[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
+fn hard() -> Regex { regex!("[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
+
+fn gen_text(n: uint) -> ~str {
+    let mut rng = task_rng();
+    let mut bytes = rng.gen_ascii_str(n).into_bytes();
+    for (i, b) in bytes.mut_iter().enumerate() {
+        if i % 20 == 0 {
+            *b = '\n' as u8
+        }
+    }
+    str::from_utf8(bytes).unwrap().to_owned()
+}
+
+throughput!(easy0_32, easy0(), 32)
+throughput!(easy0_1K, easy0(), 1<<10)
+throughput!(easy0_32K, easy0(), 32<<10)
+
+throughput!(easy1_32, easy1(), 32)
+throughput!(easy1_1K, easy1(), 1<<10)
+throughput!(easy1_32K, easy1(), 32<<10)
+
+throughput!(medium_32, medium(), 32)
+throughput!(medium_1K, medium(), 1<<10)
+throughput!(medium_32K,medium(), 32<<10)
+
+throughput!(hard_32, hard(), 32)
+throughput!(hard_1K, hard(), 1<<10)
+throughput!(hard_32K,hard(), 32<<10)
+
@@ -0,0 +1,29 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#[cfg(not(stage1))]
+#[phase(syntax)]
+extern crate regex_macros;
+
+// Dirty hack: During stage1, test dynamic regexs. For stage2, we test
+// native regexs.
+#[cfg(stage1)]
+macro_rules! regex(
+    ($re:expr) => (
+        match ::regex::Regex::new($re) {
+            Ok(re) => re,
+            Err(err) => fail!("{}", err),
+        }
+    );
+)
+
+mod bench;
+mod tests;
+
@@ -0,0 +1,199 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// ignore-tidy-linelength
+
+use regex::{Regex, NoExpand};
+
+#[test]
+fn splitn() {
+    let re = regex!(r"\d+");
+    let text = "cauchy123plato456tyler789binx";
+    let subs: Vec<&str> = re.splitn(text, 2).collect();
+    assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
+}
+
+#[test]
+fn split() {
+    let re = regex!(r"\d+");
+    let text = "cauchy123plato456tyler789binx";
+    let subs: Vec<&str> = re.split(text).collect();
+    assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
+}
+
+macro_rules! replace(
+    ($name:ident, $which:ident, $re:expr,
+     $search:expr, $replace:expr, $result:expr) => (
+        #[test]
+        fn $name() {
+            let re = regex!($re);
+            assert_eq!(re.$which($search, $replace), StrBuf::from_str($result));
+        }
+    );
+)
+
+replace!(rep_first, replace, r"\d", "age: 26", "Z", "age: Z6")
+replace!(rep_plus, replace, r"\d+", "age: 26", "Z", "age: Z")
+replace!(rep_all, replace_all, r"\d", "age: 26", "Z", "age: ZZ")
+replace!(rep_groups, replace, r"(\S+)\s+(\S+)", "w1 w2", "$2 $1", "w2 w1")
+replace!(rep_double_dollar, replace,
+         r"(\S+)\s+(\S+)", "w1 w2", "$2 $$1", "w2 $1")
+replace!(rep_no_expand, replace,
+         r"(\S+)\s+(\S+)", "w1 w2", NoExpand("$2 $1"), "$2 $1")
+replace!(rep_named, replace_all,
+         r"(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
+         "w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3")
+replace!(rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t  trim me\t   \t",
+         "", "trim me")
+
+macro_rules! noparse(
+    ($name:ident, $re:expr) => (
+        #[test]
+        fn $name() {
+            let re = $re;
+            match Regex::new(re) {
+                Err(_) => {},
+                Ok(_) => fail!("Regex '{}' should cause a parse error.", re),
+            }
+        }
+    );
+)
+
+noparse!(fail_double_repeat, "a**")
+noparse!(fail_no_repeat_arg, "*")
+noparse!(fail_no_repeat_arg_begin, "^*")
+noparse!(fail_incomplete_escape, "\\")
+noparse!(fail_class_incomplete, "[A-")
+noparse!(fail_class_not_closed, "[A")
+noparse!(fail_class_no_begin, r"[\A]")
+noparse!(fail_class_no_end, r"[\z]")
+noparse!(fail_class_no_boundary, r"[\b]")
+noparse!(fail_open_paren, "(")
+noparse!(fail_close_paren, ")")
+noparse!(fail_invalid_range, "[a-Z]")
+noparse!(fail_empty_capture_name, "(?P<>a)")
+noparse!(fail_empty_capture_exp, "(?P<name>)")
+noparse!(fail_bad_capture_name, "(?P<na-me>)")
+noparse!(fail_bad_flag, "(?a)a")
+noparse!(fail_empty_alt_before, "|a")
+noparse!(fail_empty_alt_after, "a|")
+noparse!(fail_counted_big_exact, "a{1001}")
+noparse!(fail_counted_big_min, "a{1001,}")
+noparse!(fail_counted_no_close, "a{1001")
+noparse!(fail_unfinished_cap, "(?")
+noparse!(fail_unfinished_escape, "\\")
+noparse!(fail_octal_digit, r"\8")
+noparse!(fail_hex_digit, r"\xG0")
+noparse!(fail_hex_short, r"\xF")
+noparse!(fail_hex_long_digits, r"\x{fffg}")
+noparse!(fail_flag_bad, "(?a)")
+noparse!(fail_flag_empty, "(?)")
+noparse!(fail_double_neg, "(?-i-i)")
+noparse!(fail_neg_empty, "(?i-)")
+noparse!(fail_empty_group, "()")
+noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)")
+
+macro_rules! mat(
+    ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
+        #[test]
+        fn $name() {
+            let text = $text;
+            let expected: Vec<Option<(uint, uint)>> = vec!($($loc)+);
+            let r = regex!($re);
+            let got = match r.captures(text) {
+                Some(c) => c.iter_pos().collect::<Vec<Option<(uint, uint)>>>(),
+                None => vec!(None),
+            };
+            // The test set sometimes leave out capture groups, so truncate
+            // actual capture groups to match test set.
+            let (sexpect, mut sgot) = (expected.as_slice(), got.as_slice());
+            if sgot.len() > sexpect.len() {
+                sgot = sgot.slice(0, sexpect.len())
+            }
+            if sexpect != sgot {
+                fail!("For RE '{}' against '{}', expected '{}' but got '{}'",
+                      $re, text, sexpect, sgot);
+            }
+        }
+    );
+)
+
+// Some crazy expressions from regular-expressions.info.
+mat!(match_ranges,
+     r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
+     "num: 255", Some((5, 8)))
+mat!(match_ranges_not,
+     r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
+     "num: 256", None)
+mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)))
+mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)))
+mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)))
+mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None)
+mat!(match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
+     "mine is jam.slam@gmail.com ", Some((8, 26)))
+mat!(match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
+     "mine is jam.slam@gmail ", None)
+mat!(match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
+     "mine is jam.slam@gmail.com ", Some((8, 26)))
+mat!(match_date1,
+     r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+     "1900-01-01", Some((0, 10)))
+mat!(match_date2,
+     r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+     "1900-00-01", None)
+mat!(match_date3,
+     r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+     "1900-13-01", None)
+
+// Exercise the flags.
+mat!(match_flag_case, "(?i)abc", "ABC", Some((0, 3)))
+mat!(match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3)))
+mat!(match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None)
+mat!(match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2)))
+mat!(match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4)))
+mat!(match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None)
+mat!(match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2)))
+mat!(match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11)))
+mat!(match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1)))
+mat!(match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2)))
+mat!(match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2)))
+
+// Some Unicode tests.
+mat!(uni_literal, r"Ⅰ", "Ⅰ", Some((0, 3)))
+mat!(uni_one, r"\pN", "Ⅰ", Some((0, 3)))
+mat!(uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8)))
+mat!(uni_not, r"\PN+", "abⅠ", Some((0, 2)))
+mat!(uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2)))
+mat!(uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5)))
+mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2)))
+mat!(uni_case_not, r"Δ", "δ", None)
+mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8)))
+mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10)))
+mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10)))
+mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10)))
+
+// Test the Unicode friendliness of Perl character classes.
+mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4)))
+mat!(uni_perl_w_not, r"\w+", "Ⅱ", None)
+mat!(uni_perl_w_neg, r"\W+", "Ⅱ", Some((0, 3)))
+mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8)))
+mat!(uni_perl_d_not, r"\d+", "Ⅱ", None)
+mat!(uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3)))
+mat!(uni_perl_s, r"\s+", " ", Some((0, 3)))
+mat!(uni_perl_s_not, r"\s+", "☃", None)
+mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3)))
+
+// And do the same for word boundaries.
+mat!(uni_boundary_none, r"\d\b", "6δ", None)
+mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)))
+
+// A whole mess of tests from Glenn Fowler's regex test suite.
+// Generated by the 'src/etc/regex-match-tests' program.
+mod matches;
@@ -0,0 +1,19 @@
+The following license covers testregex.c and all associated test data.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of the
+Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following disclaimer:
+
+THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,17 @@
+Test data was taken from the Go distribution, which was in turn taken from the 
+testregex test suite:
+
+  http://www2.research.att.com/~astopen/testregex/testregex.html
+
+The LICENSE in this directory corresponds to the LICENSE that the data was
+released under.
+
+The tests themselves were modified for RE2/Go. A couple were modified further 
+by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them. 
+(Yes, it seems like RE2/Go includes failing test cases.) This may or may not 
+have been a bad idea, but I think being consistent with an established Regex 
+library is worth something.
+
+Note that these files are read by 'src/etc/regexp-match-tests' and turned into 
+Rust tests found in 'src/libregexp/tests/matches.rs'.
+
@@ -0,0 +1,221 @@
+NOTE	all standard compliant implementations should pass these : 2002-05-31
+
+BE	abracadabra$	abracadabracadabra	(7,18)
+BE	a...b		abababbb		(2,7)
+BE	XXXXXX		..XXXXXX		(2,8)
+E	\)		()	(1,2)
+BE	a]		a]a	(0,2)
+B	}		}	(0,1)
+E	\}		}	(0,1)
+BE	\]		]	(0,1)
+B	]		]	(0,1)
+E	]		]	(0,1)
+B	{		{	(0,1)
+B	}		}	(0,1)
+BE	^a		ax	(0,1)
+BE	\^a		a^a	(1,3)
+BE	a\^		a^	(0,2)
+BE	a$		aa	(1,2)
+BE	a\$		a$	(0,2)
+BE	^$		NULL	(0,0)
+E	$^		NULL	(0,0)
+E	a($)		aa	(1,2)(2,2)
+E	a*(^a)		aa	(0,1)(0,1)
+E	(..)*(...)*		a	(0,0)
+E	(..)*(...)*		abcd	(0,4)(2,4)
+E	(ab|a)(bc|c)		abc	(0,3)(0,2)(2,3)
+E	(ab)c|abc		abc	(0,3)(0,2)
+E	a{0}b		ab			(1,2)
+E	(a*)(b?)(b+)b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	(a*)(b{0,1})(b{1,})b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	a{9876543210}	NULL	BADBR
+E	((a|a)|a)			a	(0,1)(0,1)(0,1)
+E	(a*)(a|aa)			aaaa	(0,4)(0,3)(3,4)
+E	a*(a.|aa)			aaaa	(0,4)(2,4)
+E	a(b)|c(d)|a(e)f			aef	(0,3)(?,?)(?,?)(1,2)
+E	(a|b)?.*			b	(0,1)(0,1)
+E	(a|b)c|a(b|c)			ac	(0,2)(0,1)
+E	(a|b)c|a(b|c)			ab	(0,2)(?,?)(1,2)
+E	(a|b)*c|(a|ab)*c		abc	(0,3)(1,2)
+E	(a|b)*c|(a|ab)*c		xc	(1,2)
+E	(.a|.b).*|.*(.a|.b)		xa	(0,2)(0,2)
+E	a?(ab|ba)ab			abab	(0,4)(0,2)
+E	a?(ac{0}b|ba)ab			abab	(0,4)(0,2)
+E	ab|abab				abbabab	(0,2)
+E	aba|bab|bba			baaabbbaba	(5,8)
+E	aba|bab				baaabbbaba	(6,9)
+E	(aa|aaa)*|(a|aaaaa)		aa	(0,2)(0,2)
+E	(a.|.a.)*|(a|.a...)		aa	(0,2)(0,2)
+E	ab|a				xabc	(1,3)
+E	ab|a				xxabc	(2,4)
+Ei	(Ab|cD)*			aBcD	(0,4)(2,4)
+BE	[^-]			--a		(2,3)
+BE	[a-]*			--a		(0,3)
+BE	[a-m-]*			--amoma--	(0,4)
+E	:::1:::0:|:::1:1:0:	:::0:::1:::1:::0:	(8,17)
+E	:::1:::0:|:::1:1:1:	:::0:::1:::1:::0:	(8,17)
+{E	[[:upper:]]		A		(0,1)	[[<element>]] not supported
+E	[[:lower:]]+		`az{		(1,3)
+E	[[:upper:]]+		@AZ[		(1,3)
+# No collation in Go
+#BE	[[-]]			[[-]]		(2,4)
+#BE	[[.NIL.]]	NULL	ECOLLATE
+#BE	[[=aleph=]]	NULL	ECOLLATE
+}
+BE$	\n		\n	(0,1)
+BEn$	\n		\n	(0,1)
+BE$	[^a]		\n	(0,1)
+BE$	\na		\na	(0,2)
+E	(a)(b)(c)	abc	(0,3)(0,1)(1,2)(2,3)
+BE	xxx		xxx	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 6,	(0,6)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	2/7	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 1,Feb 6	(5,11)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)
+E	a?(ab|ba)*	ababababababababababababababababababababababababababababababababababababababababa	(0,81)(79,81)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabbbbaa	(18,25)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabaa	(18,22)
+E	aaac|aabc|abac|abbc|baac|babc|bbac|bbbc	baaabbbabac	(7,11)
+BE$	.*			\x01\x7f	(0,2)
+E	aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	(53,57)
+L	aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	NOMATCH
+E	a*a*a*a*a*b		aaaaaaaaab	(0,10)
+BE	^			NULL		(0,0)
+BE	$			NULL		(0,0)
+BE	^$			NULL		(0,0)
+BE	^a$			a		(0,1)
+BE	abc			abc		(0,3)
+BE	abc			xabcy		(1,4)
+BE	abc			ababc		(2,5)
+BE	ab*c			abc		(0,3)
+BE	ab*bc			abc		(0,3)
+BE	ab*bc			abbc		(0,4)
+BE	ab*bc			abbbbc		(0,6)
+E	ab+bc			abbc		(0,4)
+E	ab+bc			abbbbc		(0,6)
+E	ab?bc			abbc		(0,4)
+E	ab?bc			abc		(0,3)
+E	ab?c			abc		(0,3)
+BE	^abc$			abc		(0,3)
+BE	^abc			abcc		(0,3)
+BE	abc$			aabc		(1,4)
+BE	^			abc		(0,0)
+BE	$			abc		(3,3)
+BE	a.c			abc		(0,3)
+BE	a.c			axc		(0,3)
+BE	a.*c			axyzc		(0,5)
+BE	a[bc]d			abd		(0,3)
+BE	a[b-d]e			ace		(0,3)
+BE	a[b-d]			aac		(1,3)
+BE	a[-b]			a-		(0,2)
+BE	a[b-]			a-		(0,2)
+BE	a]			a]		(0,2)
+BE	a[]]b			a]b		(0,3)
+BE	a[^bc]d			aed		(0,3)
+BE	a[^-b]c			adc		(0,3)
+BE	a[^]b]c			adc		(0,3)
+E	ab|cd			abc		(0,2)
+E	ab|cd			abcd		(0,2)
+E	a\(b			a(b		(0,3)
+E	a\(*b			ab		(0,2)
+E	a\(*b			a((b		(0,4)
+E	((a))			abc		(0,1)(0,1)(0,1)
+E	(a)b(c)			abc		(0,3)(0,1)(2,3)
+E	a+b+c			aabbabc		(4,7)
+E	a*			aaa		(0,3)
+#E	(a*)*			-		(0,0)(0,0)
+E	(a*)*			-		(0,0)(?,?)	RE2/Go
+E	(a*)+			-		(0,0)(0,0)
+#E	(a*|b)*			-		(0,0)(0,0)
+E	(a*|b)*			-		(0,0)(?,?)	RE2/Go
+E	(a+|b)*			ab		(0,2)(1,2)
+E	(a+|b)+			ab		(0,2)(1,2)
+E	(a+|b)?			ab		(0,1)(0,1)
+BE	[^ab]*			cde		(0,3)
+#E	(^)*			-		(0,0)(0,0)
+E	(^)*			-		(0,0)(?,?)	RE2/Go
+BE	a*			NULL		(0,0)
+E	([abc])*d		abbbcd		(0,6)(4,5)
+E	([abc])*bcd		abcd		(0,4)(0,1)
+E	a|b|c|d|e		e		(0,1)
+E	(a|b|c|d|e)f		ef		(0,2)(0,1)
+#E	((a*|b))*		-		(0,0)(0,0)(0,0)
+E	((a*|b))*		-		(0,0)(?,?)(?,?)	RE2/Go
+BE	abcd*efg		abcdefg		(0,7)
+BE	ab*			xabyabbbz	(1,3)
+BE	ab*			xayabbbz	(1,2)
+E	(ab|cd)e		abcde		(2,5)(2,4)
+BE	[abhgefdc]ij		hij		(0,3)
+E	(a|b)c*d		abcd		(1,4)(1,2)
+E	(ab|ab*)bc		abc		(0,3)(0,1)
+E	a([bc]*)c*		abc		(0,3)(1,3)
+E	a([bc]*)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]+)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]*)(c+d)		abcd		(0,4)(1,2)(2,4)
+E	a[bcd]*dcdcde		adcdcde		(0,7)
+E	(ab|a)b*c		abc		(0,3)(0,2)
+E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
+BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
+E	^a(bc+|b[eh])g|.h$	abh		(1,3)
+E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
+E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
+E	(bc+d$|ef*g.|h?i(j|k))	reffgz		(1,6)(1,6)
+E	(((((((((a)))))))))	a		(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
+BE	multiple words		multiple words yeah	(0,14)
+E	(.*)c(.*)		abcde		(0,5)(0,2)(3,5)
+BE	abcd			abcd		(0,4)
+E	a(bc)d			abcd		(0,4)(1,3)
+E	a[-]?c		ac		(0,3)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mo'ammar Gadhafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Kaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qadhafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gadafi	(0,14)(?,?)(10,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moamar Gaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadhdhafi	(0,18)(?,?)(13,15)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Khaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafy	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muamar Kaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Quathafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gheddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Khadafy	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Qudhafi	(0,15)(?,?)(10,12)
+E	a+(b|c)*d+		aabcdd			(0,6)(3,4)
+E	^.+$			vivi			(0,4)
+E	^(.+)$			vivi			(0,4)(0,4)
+E	^([^!.]+).att.com!(.+)$	gryphon.att.com!eby	(0,19)(0,7)(16,19)
+E	^([^!]+!)?([^!]+)$	bas			(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$	bar!bas			(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$	foo!bas			(0,7)(0,4)(4,7)
+E	^.+!([^!]+!)([^!]+)$	foo!bar!bas		(0,11)(4,8)(8,11)
+E	((foo)|(bar))!bas	bar!bas			(0,7)(0,3)(?,?)(0,3)
+E	((foo)|(bar))!bas	foo!bar!bas		(4,11)(4,7)(?,?)(4,7)
+E	((foo)|(bar))!bas	foo!bas			(0,7)(0,3)(0,3)
+E	((foo)|bar)!bas		bar!bas			(0,7)(0,3)
+E	((foo)|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	((foo)|bar)!bas		foo!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		bar!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		foo!bar!bas		(4,11)(4,7)(4,7)
+E	(foo|(bar))!bas		foo!bas			(0,7)(0,3)
+E	(foo|bar)!bas		bar!bas			(0,7)(0,3)
+E	(foo|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	(foo|bar)!bas		foo!bas			(0,7)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bas		(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bar!bas		(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bar!bas	(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bas		(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bas		(0,3)(0,3)(?,?)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bar!bas		(0,7)(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bas		(0,7)(0,7)(0,4)(4,7)
+E	.*(/XXX).*			/XXX			(0,4)(0,4)
+E	.*(\\XXX).*			\XXX			(0,4)(0,4)
+E	\\XXX				\XXX			(0,4)
+E	.*(/000).*			/000			(0,4)(0,4)
+E	.*(\\000).*			\000			(0,4)(0,4)
+E	\\000				\000			(0,4)
@@ -0,0 +1,79 @@
+NOTE	null subexpression matches : 2002-06-06
+
+E	(a*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)+		a		(0,1)(0,1)
+E	SAME		x		NOMATCH
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+
+E	([a]*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([a]*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([^b]*)*	a		(0,1)(0,1)
+#E	SAME		b		(0,0)(0,0)
+E	SAME		b		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaab		(0,6)(0,6)
+E	([ab]*)*	a		(0,1)(0,1)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		ababab		(0,6)(0,6)
+E	SAME		bababa		(0,6)(0,6)
+E	SAME		b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+E	SAME		aaaabcde	(0,5)(0,5)
+E	([^a]*)*	b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+#E	SAME		aaaaaa		(0,0)(0,0)
+E	SAME		aaaaaa		(0,0)(?,?)	RE2/Go
+E	([^ab]*)*	ccccxx		(0,6)(0,6)
+#E	SAME		ababab		(0,0)(0,0)
+E	SAME		ababab		(0,0)(?,?)	RE2/Go
+
+E	((z)+|a)*	zabcde		(0,2)(1,2)
+
+#{E	a+?		aaaaaa		(0,1)	no *? +? mimimal match ops
+#E	(a)		aaa		(0,1)(0,1)
+#E	(a*?)		aaa		(0,0)(0,0)
+#E	(a)*?		aaa		(0,0)
+#E	(a*?)*?		aaa		(0,0)
+#}
+
+B	\(a*\)*\(x\)		x	(0,1)(0,0)(0,1)
+B	\(a*\)*\(x\)		ax	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)		axa	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)\(\1\)	x	(0,1)(0,0)(0,1)(1,1)
+B	\(a*\)*\(x\)\(\1\)	ax	(0,2)(1,1)(1,2)(2,2)
+B	\(a*\)*\(x\)\(\1\)	axa	(0,3)(0,1)(1,2)(2,3)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axax	(0,4)(0,1)(1,2)(2,3)(3,4)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axxa	(0,3)(1,1)(1,2)(2,2)(2,3)
+
+#E	(a*)*(x)		x	(0,1)(0,0)(0,1)
+E	(a*)*(x)		x	(0,1)(?,?)(0,1)	RE2/Go
+E	(a*)*(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)*(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*)+(x)		x	(0,1)(0,0)(0,1)
+E	(a*)+(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)+(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*){2}(x)		x	(0,1)(0,0)(0,1)
+E	(a*){2}(x)		ax	(0,2)(1,1)(1,2)
+E	(a*){2}(x)		axa	(0,2)(1,1)(1,2)
@@ -0,0 +1,163 @@
+NOTE	implicit vs. explicit repetitions : 2009-02-02
+
+# Glenn Fowler <gsf@research.att.com>
+# conforming matches (column 4) must match one of the following BREs
+#	NOMATCH
+#	(0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
+#	(0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
+# i.e., each 3-tuple has two identical elements and one (?,?)
+
+E	((..)|(.))				NULL		NOMATCH
+E	((..)|(.))((..)|(.))			NULL		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		NULL		NOMATCH
+
+E	((..)|(.)){1}				NULL		NOMATCH
+E	((..)|(.)){2}				NULL		NOMATCH
+E	((..)|(.)){3}				NULL		NOMATCH
+
+E	((..)|(.))*				NULL		(0,0)
+
+E	((..)|(.))				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.))((..)|(.))			a		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		a		NOMATCH
+
+E	((..)|(.)){1}				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.)){2}				a		NOMATCH
+E	((..)|(.)){3}				a		NOMATCH
+
+E	((..)|(.))*				a		(0,1)(0,1)(?,?)(0,1)
+
+E	((..)|(.))				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aa		(0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
+E	((..)|(.))((..)|(.))((..)|(.))		aa		NOMATCH
+
+E	((..)|(.)){1}				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aa		(0,2)(1,2)(?,?)(1,2)
+E	((..)|(.)){3}				aa		NOMATCH
+
+E	((..)|(.))*				aa		(0,2)(0,2)(0,2)(?,?)
+
+E	((..)|(.))				aaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaa		(0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
+E	((..)|(.))((..)|(.))((..)|(.))		aaa		(0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
+
+E	((..)|(.)){1}				aaa		(0,2)(0,2)(0,2)(?,?)
+#E	((..)|(.)){2}				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.)){2}				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+E	((..)|(.)){3}				aaa		(0,3)(2,3)(?,?)(2,3)
+
+#E	((..)|(.))*				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.))*				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+
+E	((..)|(.))				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaa		(0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
+
+E	((..)|(.)){1}				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaa		(0,4)(3,4)(?,?)(3,4)
+E	((..)|(.)){3}				aaaa		(0,4)(3,4)(0,2)(3,4)	RE2/Go
+
+E	((..)|(.))*				aaaa		(0,4)(2,4)(2,4)(?,?)
+
+E	((..)|(.))				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaa		(0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
+
+E	((..)|(.)){1}				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+#E	((..)|(.))*				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.))*				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+E	((..)|(.))				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaaa		(0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
+
+E	((..)|(.)){1}				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaaa		(0,4)(2,4)(2,4)(?,?)
+E	((..)|(.)){3}				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+E	((..)|(.))*				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+NOTE	additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
+
+# These test a bug in OS X / FreeBSD / NetBSD, and libtree. 
+# Linux/GLIBC gets the {8,} and {8,8} wrong.
+
+:HA#100:E	X(.?){0,}Y	X1234567Y	(0,9)(7,8)
+:HA#101:E	X(.?){1,}Y	X1234567Y	(0,9)(7,8)
+:HA#102:E	X(.?){2,}Y	X1234567Y	(0,9)(7,8)
+:HA#103:E	X(.?){3,}Y	X1234567Y	(0,9)(7,8)
+:HA#104:E	X(.?){4,}Y	X1234567Y	(0,9)(7,8)
+:HA#105:E	X(.?){5,}Y	X1234567Y	(0,9)(7,8)
+:HA#106:E	X(.?){6,}Y	X1234567Y	(0,9)(7,8)
+:HA#107:E	X(.?){7,}Y	X1234567Y	(0,9)(7,8)
+:HA#108:E	X(.?){8,}Y	X1234567Y	(0,9)(8,8)
+#:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(7,8)
+:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(7,8)
+:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(7,8)
+:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(7,8)
+:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(7,8)
+:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(7,8)
+:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(7,8)
+:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(7,8)
+:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+:HA#118:E	X(.?){8,8}Y	X1234567Y	(0,9)(8,8)
+
+# These test a fixed bug in my regex-tdfa that did not keep the expanded
+# form properly grouped, so right association did the wrong thing with
+# these ambiguous patterns (crafted just to test my code when I became
+# suspicious of my implementation).  The first subexpression should use
+# "ab" then "a" then "bcd".
+
+# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
+# results like (0,6)(4,5)(6,6).
+
+:HA#260:E	(a|ab|c|bcd){0,}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#261:E	(a|ab|c|bcd){1,}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#262:E	(a|ab|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#263:E	(a|ab|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#264:E	(a|ab|c|bcd){4,}(d*)	ababcd	NOMATCH
+:HA#265:E	(a|ab|c|bcd){0,10}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#266:E	(a|ab|c|bcd){1,10}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#267:E	(a|ab|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#268:E	(a|ab|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#269:E	(a|ab|c|bcd){4,10}(d*)	ababcd	NOMATCH
+:HA#270:E	(a|ab|c|bcd)*(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#271:E	(a|ab|c|bcd)+(d*)	ababcd	(0,1)(0,1)(1,1)
+
+# The above worked on Linux/GLIBC but the following often fail.
+# They also trip up OS X / FreeBSD / NetBSD:
+
+#:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#284:E	(ab|a|c|bcd){4,}(d*)	ababcd	NOMATCH
+#:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#289:E	(ab|a|c|bcd){4,10}(d*)	ababcd	NOMATCH
+#:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
@@ -0,0 +1,96 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// FIXME(#13725) windows needs fixing.
+// ignore-win32
+// ignore-stage1
+// ignore-cross-compile #12102
+
+#![feature(macro_rules, phase)]
+
+extern crate regex;
+#[phase(syntax)]extern crate regex_macros;
+extern crate sync;
+
+use std::io;
+use regex::{NoExpand, Regex};
+use sync::Arc;
+
+fn count_matches(seq: &str, variant: &Regex) -> int {
+    let mut n = 0;
+    for _ in variant.find_iter(seq) {
+        n += 1;
+    }
+    n
+}
+
+fn main() {
+    let mut rdr = if std::os::getenv("RUST_BENCH").is_some() {
+        let fd = io::File::open(&Path::new("shootout-k-nucleotide.data"));
+        ~io::BufferedReader::new(fd) as ~io::Reader
+    } else {
+        ~io::stdin() as ~io::Reader
+    };
+    let mut seq = StrBuf::from_str(rdr.read_to_str().unwrap());
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(seq.as_slice(), NoExpand(""));
+    let seq_arc = Arc::new(seq.clone()); // copy before it moves
+    let clen = seq.len();
+
+    let mut seqlen = sync::Future::spawn(proc() {
+        let substs = ~[
+            (regex!("B"), "(c|g|t)"),
+            (regex!("D"), "(a|g|t)"),
+            (regex!("H"), "(a|c|t)"),
+            (regex!("K"), "(g|t)"),
+            (regex!("M"), "(a|c)"),
+            (regex!("N"), "(a|c|g|t)"),
+            (regex!("R"), "(a|g)"),
+            (regex!("S"), "(c|g)"),
+            (regex!("V"), "(a|c|g)"),
+            (regex!("W"), "(a|t)"),
+            (regex!("Y"), "(c|t)"),
+        ];
+        let mut seq = seq;
+        for (re, replacement) in substs.move_iter() {
+            seq = re.replace_all(seq.as_slice(), NoExpand(replacement));
+        }
+        seq.len()
+    });
+
+    let variants = ~[
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    let (mut variant_strs, mut counts) = (vec!(), vec!());
+    for variant in variants.move_iter() {
+        let seq_arc_copy = seq_arc.clone();
+        variant_strs.push(variant.to_str().to_owned());
+        counts.push(sync::Future::spawn(proc() {
+            count_matches(seq_arc_copy.as_slice(), &variant)
+        }));
+    }
+
+    for (i, variant) in variant_strs.iter().enumerate() {
+        println!("{} {}", variant, counts.get_mut(i).get());
+    }
+    println!("");
+    println!("{}", ilen);
+    println!("{}", clen);
+    println!("{}", seqlen.get());
+}
@@ -0,0 +1,28 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// FIXME(#13725) windows needs fixing.
+// ignore-win32
+// ignore-stage1
+
+#![feature(phase)]
+
+extern crate regex;
+#[phase(syntax)] extern crate regex_macros;
+
+// Tests to make sure that `regex!` will produce a compile error when given
+// an invalid regular expression.
+// More exhaustive failure tests for the parser are done with the traditional
+// unit testing infrastructure, since both dynamic and native regexes use the
+// same parser.
+
+fn main() {
+    let _ = regex!("("); //~ ERROR Regex syntax error
+}