Skip to content

Commit 4e60f7e

Browse files
committed
First commit
0 parents  commit 4e60f7e

15 files changed

+556
-0
lines changed

.gitignore

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
2+
#********** osx template**********
3+
4+
.DS_Store
5+
6+
# Thumbnails
7+
._*
8+
9+
# Files that might appear on external disk
10+
.Spotlight-V100
11+
.Trashes
12+
13+
14+
#********** linux template**********
15+
16+
.*
17+
!.gitignore
18+
*~
19+
20+
# KDE
21+
.directory
22+
23+
24+
#********** windows template**********
25+
26+
# Windows image file caches
27+
Thumbs.db
28+
29+
# Folder config file
30+
Desktop.ini
31+
32+
# Recycle Bin used on file shares
33+
$RECYCLE.BIN/
34+
35+
36+
#********** ruby template**********
37+
38+
*.gem
39+
*.rbc
40+
.bundle
41+
.config
42+
coverage
43+
InstalledFiles
44+
lib/bundler/man
45+
pkg
46+
rdoc
47+
spec/reports
48+
test/tmp
49+
test/version_tmp
50+
tmp
51+
52+
# YARD artifacts
53+
.yardoc
54+
_yardoc
55+
doc/
56+

Rakefile

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
require 'rake/testtask'
2+
3+
# To run one test: rake test TEST=just_one_file.rb
4+
Rake::TestTask.new do |t|
5+
t.test_files = FileList['test/*_test.rb']
6+
t.libs << 'test'
7+
end
8+
9+
def built_gem_name
10+
Dir.glob('erb_parser-*.*.*.gem').first
11+
end
12+
13+
task :build do
14+
`rm *.gem`
15+
puts `gem build erb_parser.gemspec`
16+
end
17+
18+
task :install do
19+
puts `gem install #{built_gem_name}`
20+
end
21+
22+
task :release do
23+
puts `gem push #{built_gem_name}`
24+
end

erb_parser.gemspec

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Gem::Specification.new do |s|
2+
s.name = 'erb_parser'
3+
s.version = '0.0.0'
4+
s.date = '2014-02-26'
5+
s.summary = 'Parser for ERB templates'
6+
s.description = 'Parses ERB templates into two types of tokens: Plain text and ERB tags. Special support for HTML/XML.'
7+
s.authors = ['Jarrett Colby']
8+
s.email = '[email protected]'
9+
s.files = Dir.glob('lib/**/*')
10+
s.homepage = 'http://madebyhq.com/'
11+
12+
s.add_runtime_dependency 'treetop'
13+
14+
s.add_development_dependency 'minitest'
15+
s.add_development_dependency 'turn'
16+
end

lib/erb_parser.rb

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
require 'treetop'
2+
require 'erb_parser/nodes'
3+
require 'erb_parser/treetop_runner'
4+
require 'erb_parser/parsed_erb'
5+
require 'erb_parser/erb_tag'
6+
require 'erb_parser/xml_transformer'
7+
8+
module ErbParser
9+
def self.parse(str)
10+
ParsedErb.new TreetopRunner.run(str)
11+
end
12+
13+
# Takes a string representing an XML document or fragment. Finds every ERB tag in the
14+
# XML and replaces it with the tag <erb>. The contents of the replacement tag will be
15+
# the inner Ruby code, escaped for XML. You can override the tag like so:
16+
#
17+
# ErbParser.transform_xml str, :tag => 'tag-name'
18+
#
19+
# If the ERB tag is of the form +<%=+, the attribute +interpolated="true"+ will be
20+
# added. Else if the ERB tag is of the form +<#+, the attribute +comment="true"+ will be
21+
# added. You can override this behavior like so:
22+
#
23+
# ErbParser.transform_xml str, :interp_attr => {'attr-name' => 'attr-value'}
24+
# ErbParser.transform_xml str, :interp_attr => false
25+
#
26+
# ErbParser.transform_xml str, :comment_attr => {'attr-name' => 'attr-value'}
27+
# ErbParser.transform_xml str, :comment_attr => false
28+
#
29+
# The returned value is a string representing the transformed XML document or fragment.
30+
def self.transform_xml(str, options = {})
31+
XmlTransformer.transform(parse(str), options)
32+
end
33+
end

lib/erb_parser/erb_grammar.treetop

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
module ErbParser
2+
grammar ErbGrammar
3+
rule document
4+
(erb_tag / text)*
5+
end
6+
7+
rule text
8+
(!'<%' .)+
9+
<Text>
10+
end
11+
12+
rule erb_tag
13+
'<%'
14+
number_sign:'#'? equal_sign:'='?
15+
_ruby_code:ruby_code
16+
'%>'
17+
<ErbTag>
18+
end
19+
20+
rule ruby_code
21+
(string_literal / (!'%>' .))*
22+
end
23+
24+
# Matches the following quote styles:
25+
# "string"
26+
# 'string'
27+
# %q(string (string) string)
28+
# %Q(string (string) string)
29+
# %(string (string) string)
30+
# %q{string {string} string}
31+
# %Q{string {string} string}
32+
# %{string {string} string}
33+
rule string_literal
34+
('"' ('\"' / !'"' .)* '"') /
35+
('\'' ('\\\'' / !'\'' .)* '\'') /
36+
('%' ('q' / 'Q')? (curly_brackets / parens))
37+
end
38+
39+
rule curly_brackets
40+
'{' (curly_brackets / '\}' / !'}' .)* '}'
41+
end
42+
43+
rule parens
44+
'(' (parens / '\)' / !')' .)* ')'
45+
end
46+
end
47+
end

lib/erb_parser/erb_tag.rb

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
module ErbParser
2+
class ErbTag
3+
def comment?
4+
@treetop_node.comment?
5+
end
6+
7+
def initialize(treetop_node)
8+
@treetop_node = treetop_node
9+
end
10+
11+
def interpolated?
12+
@treetop_node.interpolated?
13+
end
14+
15+
def ruby_code
16+
@treetop_node.ruby_code
17+
end
18+
19+
def to_s
20+
@treetop_node.text_value
21+
end
22+
end
23+
end

lib/erb_parser/nodes.rb

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
module ErbParser
2+
module ErbGrammar
3+
module Text
4+
def type
5+
:text
6+
end
7+
end
8+
9+
module ErbTag
10+
def comment?
11+
!number_sign.empty?
12+
end
13+
14+
def interpolated?
15+
!equal_sign.empty?
16+
end
17+
18+
def ruby_code
19+
_ruby_code.text_value
20+
end
21+
22+
def type
23+
:erb_tag
24+
end
25+
end
26+
end
27+
end

lib/erb_parser/parsed_erb.rb

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
module ErbParser
2+
class ParsedErb
3+
# Accesses the parsed tokens as an array. Each element of the array is either a
4+
# String, representing plain text, or an ErbTag.
5+
def [](index)
6+
@tokens[index]
7+
end
8+
9+
def initialize(treetop_ast)
10+
@treetop_ast = treetop_ast
11+
@tokens = treetop_ast.elements.map do |elem|
12+
case elem.type
13+
when :text
14+
elem.text_value
15+
when :erb_tag
16+
ErbTag.new elem
17+
else
18+
raise "Unexpected type: #{elem.type}"
19+
end
20+
end
21+
end
22+
23+
# Returns the array of parsed tokens.
24+
attr_reader :tokens
25+
26+
# Returns the raw Treetop AST.
27+
attr_reader :treetop_ast
28+
end
29+
end

lib/erb_parser/treetop_runner.rb

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
Treetop.load File.join(File.dirname(__FILE__), 'erb_grammar')
2+
3+
module ErbParser
4+
# This module doesn't do much. It just provides some boilerplate code to invoke Treetop.
5+
# The result is whatever Treetop returns.
6+
module TreetopRunner
7+
def self.run(str, options = {})
8+
treetop = ErbGrammarParser.new
9+
if result = treetop.parse(str, options)
10+
result
11+
else
12+
raise ParseError, treetop.failure_reason
13+
end
14+
end
15+
16+
class ParseError < RuntimeError; end
17+
end
18+
end

lib/erb_parser/xml_transformer.rb

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
require 'cgi'
2+
3+
module ErbParser
4+
module XmlTransformer
5+
def self.transform(parsed_erb, options)
6+
options = {
7+
:tag => 'erb',
8+
:interp_attr => {'interpolated' => 'true'},
9+
:comment_attr => {'comment' => 'true'}
10+
}.merge(options)
11+
12+
parsed_erb.tokens.map do |elem|
13+
case elem
14+
when String
15+
elem
16+
when ErbTag
17+
if elem.interpolated?
18+
if options[:interp_attr].is_a?(Hash)
19+
attrs = options[:interp_attr]
20+
else
21+
attrs = {}
22+
end
23+
elsif elem.comment?
24+
if options[:comment_attr].is_a?(Hash)
25+
attrs = options[:comment_attr]
26+
else
27+
attrs = {}
28+
end
29+
else
30+
attrs = {}
31+
end
32+
content_tag options[:tag], CGI.escape_html(elem.ruby_code), attrs
33+
else
34+
raise "Unexpected element: #{elem.class.name}"
35+
end
36+
end.join
37+
end
38+
39+
def self.content_tag(name, contents, attrs = {})
40+
if attrs.empty?
41+
attrs_str = ''
42+
else
43+
attrs_str = ' ' + attrs.map do |key, val|
44+
key = CGI.escape_html(key.to_s)
45+
val = CGI.escape_html(val.to_s)
46+
%Q(#{key}="#{val}")
47+
end.join(' ')
48+
end
49+
'<' + name.to_s + attrs_str + '>' + contents.to_s + '</' + name.to_s + '>'
50+
end
51+
end
52+
end

readme.md

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
## Can ErbParser handle all valid Ruby code?
2+
3+
No it cannot. Ruby has a very complex syntax. In a library like this, it would be a fool's
4+
errand to try to handle every weird syntactic construct that could technically be
5+
considered valid Ruby. Instead, this library is designed to handle only the constructs
6+
that would commonly appear inside ERB tags. In other words, the basics of the language.
7+
8+
Just avoid exotic syntactic constructs, and you should be fine. (You shouldn't do anything
9+
syntactically fancy in an ERB template anyway--it's bad coding style.) In particular, you
10+
must avoid Ruby's weirder string literals, such as the following:
11+
12+
%q!This is a valid string literal, but you must not use this syntax.!
13+
14+
Also be wary of tricky escape sequences. If you absolutely must use unusual syntax, and it
15+
breaks ErbParser, consider moving the offending code into a class or module external to
16+
the ERB template.
17+
18+
Nonetheless, the library *does* account for and allow the following string literal
19+
formats:
20+
21+
"string"
22+
'string'
23+
%q(string (string) string)
24+
%Q(string (string) string)
25+
%(string (string) string)
26+
%q{string {string} string}
27+
%Q{string {string} string}
28+
%{string {string} string}
29+
30+
This parser is *not* hardened against malicious input. But then, you shouldn't be
31+
accepting ERB as untrusted input anyway, because ERB allows arbitrary code execution.
32+
33+
## What does ErbParser do with invalid ERB or Ruby code?
34+
35+
If you pass code containing a syntax error, the parsing behavior is undefined. You may get
36+
an exception, or you may just get nonsensical results. It depends on the type of the
37+
syntax error.

0 commit comments

Comments
 (0)