Skip to content
This repository was archived by the owner on Nov 21, 2018. It is now read-only.

Commit f9c888a

Browse files
michaelwoeristernikomatsakis
authored andcommitted
introduce patches for regex
1 parent d8034c8 commit f9c888a

File tree

177 files changed

+201524
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

177 files changed

+201524
-0
lines changed

regex-0.1.80/.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
target
2+
Cargo.lock
3+
bench-log
4+
.*.swp
5+
wiki
6+
tags

regex-0.1.80/.travis.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
language: rust
2+
rust:
3+
- 1.3.0
4+
- stable
5+
- beta
6+
- nightly
7+
sudo: false
8+
script:
9+
- cargo build --verbose
10+
- if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
11+
cargo build --verbose --manifest-path=regex-debug/Cargo.toml;
12+
RUSTFLAGS="-C target-feature=+ssse3" cargo test --verbose --features 'simd-accel pattern';
13+
else
14+
travis_wait cargo test --verbose;
15+
fi
16+
- ./run-shootout-test
17+
- cargo doc --verbose
18+
- cargo test --verbose --manifest-path=regex-syntax/Cargo.toml
19+
- cargo doc --verbose --manifest-path=regex-syntax/Cargo.toml
20+
- if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
21+
(cd regex-capi && cargo build --verbose);
22+
(cd regex-capi/ctest && ./compile && LD_LIBRARY_PATH=../target/debug ./test);
23+
(cd regex-capi/examples && ./compile && LD_LIBRARY_PATH=../target/release ./iter);
24+
(cd bench && travis_wait ./run rust);
25+
(cd bench && travis_wait ./run rust-bytes --no-run);
26+
(cd bench && travis_wait ./run pcre1 --no-run);
27+
(cd bench && travis_wait ./run onig --no-run);
28+
travis_wait cargo test --verbose --manifest-path=regex_macros/Cargo.toml;
29+
fi
30+
addons:
31+
apt:
32+
packages:
33+
- libcurl4-openssl-dev
34+
- libelf-dev
35+
- libdw-dev
36+
- binutils-dev
37+
after_success: |
38+
[ $TRAVIS_BRANCH = master ] &&
39+
[ $TRAVIS_PULL_REQUEST = false ] &&
40+
[ $TRAVIS_RUST_VERSION = nightly ] &&
41+
echo '<meta http-equiv=refresh content=0;url=regex/index.html>' > target/doc/index.html &&
42+
pip install ghp-import --user $USER &&
43+
$HOME/.local/bin/ghp-import -n target/doc &&
44+
git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages &&
45+
wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz &&
46+
tar xzf master.tar.gz && mkdir kcov-master/build && cd kcov-master/build && cmake .. && make && make install DESTDIR=../tmp && cd ../.. &&
47+
PATH="./kcov-master/tmp/usr/local/bin:$PATH" ./run-kcov --coveralls-id $TRAVIS_JOB_ID
48+
env:
49+
global:
50+
secure: VvIrYRW/a8FmBA61hn1bDrqWwR92tANOT6PCeLYd9A9ViQrN07PE6uGsnd9iwr8itck10Ctl1mThZYUkK8BDFEmlvSxOFJ/Po5eRe6A1CYuFF40zizJ+3NllVkN20kwoQDe0kxwZVDGO9Yi1icHUrbRPWDfS+1tfobO/UT8Dlng=
51+
notifications:
52+
email:
53+
on_success: never

regex-0.1.80/030-compile_one.diff

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
diff --git a/regex-0.1.80/src/compile.rs b/regex-0.1.80/src/compile.rs
2+
index 9db743f..ef1948e 100644
3+
--- a/regex-0.1.80/src/compile.rs
4+
+++ b/regex-0.1.80/src/compile.rs
5+
@@ -137,6 +137,8 @@ impl Compiler {
6+
}
7+
8+
fn compile_one(mut self, expr: &Expr) -> result::Result<Program, Error> {
9+
+ {} // @030
10+
+
11+
// If we're compiling a forward DFA and we aren't anchored, then
12+
// add a `.*?` before the first capture group.
13+
// Other matching engines handle this by baking the logic into the
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/regex-0.1.80/src/expand.rs b/regex-0.1.80/src/expand.rs
2+
index 9bea703..3b6ae94 100644
3+
--- a/regex-0.1.80/src/expand.rs
4+
+++ b/regex-0.1.80/src/expand.rs
5+
@@ -84,6 +84,7 @@ fn find_cap_ref(mut replacement: &[u8]) -> Option<CaptureRef> {
6+
}
7+
8+
fn is_valid_cap_letter(b: &u8) -> bool {
9+
+ { }
10+
match *b {
11+
b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true,
12+
_ => false,

regex-0.1.80/050-expand.diff

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/regex-0.1.80/src/expand.rs b/regex-0.1.80/src/expand.rs
2+
index 9bea703..a28b82d 100644
3+
--- a/regex-0.1.80/src/expand.rs
4+
+++ b/regex-0.1.80/src/expand.rs
5+
@@ -5,6 +5,7 @@ use memchr::memchr;
6+
use bytes::Captures;
7+
8+
pub fn expand(caps: &Captures, mut replacement: &[u8], dst: &mut Vec<u8>) {
9+
+ { }
10+
while !replacement.is_empty() {
11+
match memchr(b'$', replacement) {
12+
None => break,

regex-0.1.80/060-Compiler-new.diff

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/regex-0.1.80/src/compile.rs b/regex-0.1.80/src/compile.rs
2+
index 9db743f..fb812ae 100644
3+
--- a/regex-0.1.80/src/compile.rs
4+
+++ b/regex-0.1.80/src/compile.rs
5+
@@ -54,6 +54,7 @@ impl Compiler {
6+
///
7+
/// Various options can be set before calling `compile` on an expression.
8+
pub fn new() -> Self {
9+
+ {}
10+
Compiler {
11+
insts: vec![],
12+
compiled: Program::new(),

regex-0.1.80/060-reverse.diff

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/regex-0.1.80/src/compile.rs b/regex-0.1.80/src/compile.rs
2+
index 9db743f..4e56c2d 100644
3+
--- a/regex-0.1.80/src/compile.rs
4+
+++ b/regex-0.1.80/src/compile.rs
5+
@@ -114,6 +114,7 @@ impl Compiler {
6+
/// When set, the machine returned is suitable for matching text in
7+
/// reverse. In particular, all concatenations are flipped.
8+
pub fn reverse(mut self, yes: bool) -> Self {
9+
+ {}
10+
self.compiled.is_reverse = yes;
11+
self
12+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
diff --git a/regex-0.1.80/src/freqs.rs b/regex-0.1.80/src/freqs.rs
2+
index 92bafc1..6eb5799 100644
3+
--- a/regex-0.1.80/src/freqs.rs
4+
+++ b/regex-0.1.80/src/freqs.rs
5+
@@ -12,7 +12,7 @@
6+
// edit directly
7+
8+
pub const BYTE_FREQUENCIES: [u8; 256] = [
9+
- 55, // '\x00'
10+
+ 54+1, // '\x00'
11+
52, // '\x01'
12+
51, // '\x02'
13+
50, // '\x03'

regex-0.1.80/080-SparseSet.diff

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
diff --git a/regex-0.1.80/src/sparse.rs b/regex-0.1.80/src/sparse.rs
2+
index 34c05e7..ef5188e 100644
3+
--- a/regex-0.1.80/src/sparse.rs
4+
+++ b/regex-0.1.80/src/sparse.rs
5+
@@ -16,13 +16,13 @@ pub struct SparseSet {
6+
/// Dense contains the instruction pointers in the order in which they
7+
/// were inserted. Accessing elements >= self.size is illegal.
8+
dense: Vec<usize>,
9+
+ /// The number of elements in the set.
10+
+ size: usize,
11+
/// Sparse maps instruction pointers to their location in dense.
12+
///
13+
/// An instruction pointer is in the set if and only if
14+
/// sparse[ip] < size && ip == dense[sparse[ip]].
15+
sparse: Vec<usize>,
16+
- /// The number of elements in the set.
17+
- size: usize,
18+
}
19+
20+
impl SparseSet {

regex-0.1.80/090-Job.diff

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
diff --git a/regex-0.1.80/src/backtrack.rs b/regex-0.1.80/src/backtrack.rs
2+
index 3c06254..4b72fd4 100644
3+
--- a/regex-0.1.80/src/backtrack.rs
4+
+++ b/regex-0.1.80/src/backtrack.rs
5+
@@ -82,8 +82,8 @@ impl Cache {
6+
/// stack to do it.
7+
#[derive(Clone, Copy, Debug)]
8+
enum Job {
9+
- Inst { ip: InstPtr, at: InputAt },
10+
SaveRestore { slot: usize, old_pos: Option<usize> },
11+
+ Inst { ip: InstPtr, at: InputAt },
12+
}
13+
14+
impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {

regex-0.1.80/CHANGELOG.md

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
0.1.80
2+
======
3+
* [PR #292](https://github.com/rust-lang-nursery/regex/pull/292):
4+
Fixes bug #291, which was introduced by PR #290.
5+
6+
0.1.79
7+
======
8+
* Require regex-syntax 0.3.8.
9+
10+
0.1.78
11+
======
12+
* [PR #290](https://github.com/rust-lang-nursery/regex/pull/290):
13+
Fixes bug #289, which caused some regexes with a certain combination
14+
of literals to match incorrectly.
15+
16+
0.1.77
17+
======
18+
* [PR #281](https://github.com/rust-lang-nursery/regex/pull/281):
19+
Fixes bug #280 by disabling all literal optimizations when a pattern
20+
is partially anchored.
21+
22+
0.1.76
23+
======
24+
* Tweak criteria for using the Teddy literal matcher.
25+
26+
0.1.75
27+
======
28+
* [PR #275](https://github.com/rust-lang-nursery/regex/pull/275):
29+
Improves match verification performance in the Teddy SIMD searcher.
30+
* [PR #278](https://github.com/rust-lang-nursery/regex/pull/278):
31+
Replaces slow substring loop in the Teddy SIMD searcher with Aho-Corasick.
32+
* Implemented DoubleEndedIterator on regex set match iterators.
33+
34+
0.1.74
35+
======
36+
* Release regex-syntax 0.3.5 with a minor bug fix.
37+
* Fix bug #272.
38+
* Fix bug #277.
39+
* [PR #270](https://github.com/rust-lang-nursery/regex/pull/270):
40+
Fixes bugs #264, #268 and an unreported where the DFA cache size could be
41+
drastically under estimated in some cases (leading to high unexpected memory
42+
usage).
43+
44+
0.1.73
45+
======
46+
* Release `regex-syntax 0.3.4`.
47+
* Bump `regex-syntax` dependency version for `regex` to `0.3.4`.
48+
49+
0.1.72
50+
======
51+
* [PR #262](https://github.com/rust-lang-nursery/regex/pull/262):
52+
Fixes a number of small bugs caught by fuzz testing (AFL).
53+
54+
0.1.71
55+
======
56+
* [PR #236](https://github.com/rust-lang-nursery/regex/pull/236):
57+
Fix a bug in how suffix literals were extracted, which could lead
58+
to invalid match behavior in some cases.
59+
60+
0.1.70
61+
======
62+
* [PR #231](https://github.com/rust-lang-nursery/regex/pull/231):
63+
Add SIMD accelerated multiple pattern search.
64+
* [PR #228](https://github.com/rust-lang-nursery/regex/pull/228):
65+
Reintroduce the reverse suffix literal optimization.
66+
* [PR #226](https://github.com/rust-lang-nursery/regex/pull/226):
67+
Implements NFA state compression in the lazy DFA.
68+
* [PR #223](https://github.com/rust-lang-nursery/regex/pull/223):
69+
A fully anchored RegexSet can now short-circuit.
70+
71+
0.1.69
72+
======
73+
* [PR #216](https://github.com/rust-lang-nursery/regex/pull/216):
74+
Tweak the threshold for running backtracking.
75+
* [PR #217](https://github.com/rust-lang-nursery/regex/pull/217):
76+
Add upper limit (from the DFA) to capture search (for the NFA).
77+
* [PR #218](https://github.com/rust-lang-nursery/regex/pull/218):
78+
Add rure, a C API.
79+
80+
0.1.68
81+
======
82+
* [PR #210](https://github.com/rust-lang-nursery/regex/pull/210):
83+
Fixed a performance bug in `bytes::Regex::replace` where `extend` was used
84+
instead of `extend_from_slice`.
85+
* [PR #211](https://github.com/rust-lang-nursery/regex/pull/211):
86+
Fixed a bug in the handling of word boundaries in the DFA.
87+
* [PR #213](https://github.com/rust-lang-nursery/regex/pull/213):
88+
Added RE2 and Tcl to the benchmark harness. Also added a CLI utility from
89+
running regexes using any of the following regex engines: PCRE1, PCRE2,
90+
Oniguruma, RE2, Tcl and of course Rust's own regexes.
91+
92+
0.1.67
93+
======
94+
* [PR #201](https://github.com/rust-lang-nursery/regex/pull/201):
95+
Fix undefined behavior in the `regex!` compiler plugin macro.
96+
* [PR #205](https://github.com/rust-lang-nursery/regex/pull/205):
97+
More improvements to DFA performance. Competitive with RE2. See PR for
98+
benchmarks.
99+
* [PR #209](https://github.com/rust-lang-nursery/regex/pull/209):
100+
Release 0.1.66 was semver incompatible since it required a newer version
101+
of Rust than previous releases. This PR fixes that. (And `0.1.66` was
102+
yanked.)
103+
104+
0.1.66
105+
======
106+
* Speculative support for Unicode word boundaries was added to the DFA. This
107+
should remove the last common case that disqualified use of the DFA.
108+
* An optimization that scanned for suffix literals and then matched the regular
109+
expression in reverse was removed because it had worst case quadratic time
110+
complexity. It was replaced with a more limited optimization where, given any
111+
regex of the form `re$`, it will be matched in reverse from the end of the
112+
haystack.
113+
* [PR #202](https://github.com/rust-lang-nursery/regex/pull/202):
114+
The inner loop of the DFA was heavily optimized to improve cache locality
115+
and reduce the overall number of instructions run on each iteration. This
116+
represents the first use of `unsafe` in `regex` (to elide bounds checks).
117+
* [PR #200](https://github.com/rust-lang-nursery/regex/pull/200):
118+
Use of the `mempool` crate (which used thread local storage) was replaced
119+
with a faster version of a similar API in @Amanieu's `thread_local` crate.
120+
It should reduce contention when using a regex from multiple threads
121+
simultaneously.
122+
* PCRE2 JIT benchmarks were added. A benchmark comparison can be found
123+
[here](https://gist.github.com/anonymous/14683c01993e91689f7206a18675901b).
124+
(Includes a comparison with PCRE1's JIT and Oniguruma.)
125+
* A bug where word boundaries weren't being matched correctly in the DFA was
126+
fixed. This only affected use of `bytes::Regex`.
127+
* [#160](https://github.com/rust-lang-nursery/regex/issues/160):
128+
`Captures` now has a `Debug` impl.

regex-0.1.80/Cargo.toml

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
[package]
2+
name = "regex"
3+
version = "0.1.80" #:version
4+
authors = ["The Rust Project Developers"]
5+
license = "MIT/Apache-2.0"
6+
readme = "README.md"
7+
repository = "https://github.com/rust-lang/regex"
8+
documentation = "https://doc.rust-lang.org/regex"
9+
homepage = "https://github.com/rust-lang/regex"
10+
description = """
11+
An implementation of regular expressions for Rust. This implementation uses
12+
finite automata and guarantees linear time matching on all inputs.
13+
"""
14+
15+
[dependencies]
16+
# For very fast prefix literal matching.
17+
aho-corasick = "0.5.3"
18+
# For skipping along search text quickly when a leading byte is known.
19+
memchr = "0.1.9"
20+
# For managing regex caches quickly across multiple threads.
21+
thread_local = "0.2.4"
22+
# For parsing regular expressions.
23+
regex-syntax = { path = "regex-syntax", version = "0.3.8" }
24+
# For accelerating text search.
25+
simd = { version = "0.1.0", optional = true }
26+
# For compiling UTF-8 decoding into automata.
27+
utf8-ranges = "0.1.3"
28+
29+
[dev-dependencies]
30+
# For examples.
31+
lazy_static = "0.1"
32+
# For property based tests.
33+
quickcheck = "0.2"
34+
# For generating random test data.
35+
rand = "0.3"
36+
37+
[features]
38+
# Enable to use the unstable pattern traits defined in std.
39+
pattern = []
40+
# Enable to use simd acceleration.
41+
simd-accel = ["simd"]
42+
43+
[lib]
44+
# There are no benchmarks in the library code itself
45+
bench = false
46+
47+
# Run the test suite on the default behavior of Regex::new.
48+
# This includes a mish mash of NFAs and DFAs, which are chosen automatically
49+
# based on the regex. We test both of the NFA implementations by forcing their
50+
# usage with the test definitions below. (We can't test the DFA implementations
51+
# in the same way since they can't be used for every regex tested.)
52+
[[test]]
53+
path = "tests/test_default.rs"
54+
name = "default"
55+
56+
# The same as the default tests, but run on bytes::Regex.
57+
[[test]]
58+
path = "tests/test_default_bytes.rs"
59+
name = "default-bytes"
60+
61+
# Run the test suite on the NFA algorithm over Unicode codepoints.
62+
[[test]]
63+
path = "tests/test_nfa.rs"
64+
name = "nfa"
65+
66+
# Run the test suite on the NFA algorithm over bytes that match UTF-8 only.
67+
[[test]]
68+
path = "tests/test_nfa_utf8bytes.rs"
69+
name = "nfa-utf8bytes"
70+
71+
# Run the test suite on the NFA algorithm over arbitrary bytes.
72+
[[test]]
73+
path = "tests/test_nfa_bytes.rs"
74+
name = "nfa-bytes"
75+
76+
# Run the test suite on the backtracking engine over Unicode codepoints.
77+
[[test]]
78+
path = "tests/test_backtrack.rs"
79+
name = "backtrack"
80+
81+
# Run the test suite on the backtracking engine over bytes that match UTF-8
82+
# only.
83+
[[test]]
84+
path = "tests/test_backtrack_utf8bytes.rs"
85+
name = "backtrack-utf8bytes"
86+
87+
# Run the test suite on the backtracking engine over arbitrary bytes.
88+
[[test]]
89+
path = "tests/test_backtrack_bytes.rs"
90+
name = "backtrack-bytes"
91+
92+
[profile.test]
93+
debug = true

0 commit comments

Comments
 (0)