-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathRdScriptEditorTokens.l
237 lines (210 loc) · 5.77 KB
/
RdScriptEditorTokens.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
%{
/*
* R.app : a Cocoa front end to: "R A Computer Language for Statistical Data Analysis"
*
* R.app Copyright notes:
* Copyright (C) 2004-5 The R Foundation
* written by Stefano M. Iacus and Simon Urbanek
*
*
* R Copyright notes:
* Copyright (C) 1995-1996 Robert Gentleman and Ross Ihaka
* Copyright (C) 1998-2001 The R Development Core Team
* Copyright (C) 2002-2004 The R Foundation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* A copy of the GNU General Public License is available via WWW at
* http://www.gnu.org/copyleft/gpl.html. You can also obtain it by
* writing to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA.
*
* RScriptEditorTokens.l
*
* Created by Hans-J. Bibiko on 09/01/2012.
*
* Flex parser for syntax highlighting Rd code.
*
*/
#import "RdScriptEditorTokens.h"
size_t utf8strlenRd(const char * _s);
extern size_t yyuoffset, yyuleng;
//keep track of the current utf-8 character (not byte) offset and token length
#define YY_USER_ACTION { yyuoffset += yyuleng; yyuleng = utf8strlenRd(yytext); }
//ignore the output of unmatched characters
#define ECHO {}
%}
%option prefix="rd"
%option noyywrap
%option nounput
%option case-sensitive
s [ \t\n\r]+
break [^a-zA-Z_0-9À-゚]
section \\(s(ynopsis|ource|ubsection|e(ction|ealso))|Rd(Opts|version)|n(ote|ame)|concept|title|Sexpr|d(ocType|e(scription|tails))|usage|e(ncoding|xamples)|value|keyword|format|a(uthor|lias|rguments)|references)
macrowarg \\(s(trong|pecial|amp|Quote)|href|newcommand|c(ite|o(de|mmand))|t(estonly|abular)|i(tem(ize)?|f(else)?)|S(3method|4method)|o(ut|ption)|d(ont(show|test|run)|e(scribe|qn)|fn|Quote)|CRANpkg|url|p(kg|reformatted)|e(n(c|d|umerate|v)|qn|m(ph|ail))|v(erb|ar)|kbd|fi(le|gure)|link(S4class)?|acronym|renewcommand|method|b(old|egin))
macrowoarg \\(R|cr|tab|item|dots|l(dots|eft)|right|ge)
macrogen \\[a-zA-Z0-9_]+
%x verbatim
%%
\\(%) /* ignore escaped comment sign */
\\(\\) /* ignore escaped \ sign */
%[^\n\r]*(\n|\r)? { return RDPT_COMMENT; } /* % Comments */
^#ifn?def/[ \t] { return RDPT_DIRECTIVE; }
^#endif/[ \t\n\r] { return RDPT_DIRECTIVE; }
\\verb/\{ { BEGIN(verbatim); return RDPT_MACRO_ARG; }
<verbatim>[^\}] /* ignore everything inside \verb{} */
<verbatim>\\\} /* ignore everything inside \verb{} */
<verbatim>\\(%) /* ignore everything inside \verb{} */
<verbatim>%[^\n\r]*(\n|\r)? { return RDPT_COMMENT; } /* % sign is valid inside \verb{} */
<verbatim>\} { BEGIN(INITIAL); } /* verbatim end */
{section}/\{ { return RDPT_SECTION; } /* section macros */
\\Sexpr/\[ { return RDPT_SECTION; } /* section macros */
{macrowarg}/\{ { return RDPT_MACRO_ARG; } /* macros with arguments */
\\link/\[ { return RDPT_MACRO_ARG; } /* macros with arguments */
{macrowoarg}/(\\|{s}|%|{break}) { return RDPT_MACRO_ARG; } /* macros without arguments */
{macrogen} { return RDPT_MACRO_GEN; } /* unknown macros */
. { return RDPT_OTHER; }
<<EOF>> {
BEGIN(INITIAL); /* make sure we return to initial state when finished! */
yy_delete_buffer(YY_CURRENT_BUFFER);
return 0;
}
%%
#define ONEMASK ((size_t)(-1) / 0xFF)
// adapted from http://www.daemonology.net/blog/2008-06-05-faster-utf8-strlen.html
size_t utf8strlenRd(const char * _s)
{
const char * s;
size_t count = 0;
size_t u;
unsigned char b;
/* Handle any initial misaligned bytes. */
for (s = _s; (uintptr_t)(s) & (sizeof(size_t) - 1); s++) {
b = *s;
/* Exit if we hit a zero byte. */
if (b == '\0')
goto done;
/* Is this byte NOT the first byte of a character? */
count += (b >> 7) & ((~b) >> 6);
}
/* Handle complete blocks. */
for (; ; s += sizeof(size_t)) {
/* Prefetch 256 bytes ahead. */
__builtin_prefetch(&s[256], 0, 0);
/* Grab 4 or 8 bytes of UTF-8 data. */
u = *(size_t *)(s);
/* Exit the loop if there are any zero bytes. */
if ((u - ONEMASK) & (~u) & (ONEMASK * 0x80))
break;
/* Count bytes which are NOT the first byte of a character. */
u = ((u & (ONEMASK * 0x80)) >> 7) & ((~u) >> 6);
count += (u * ONEMASK) >> ((sizeof(size_t) - 1) * 8);
}
/* Take care of any left-over bytes. */
for (; ; s++) {
b = *s;
/* Exit if we hit a zero byte. */
if (b == '\0')
break;
/* Is this byte NOT the first byte of a character? */
count += (b >> 7) & ((~b) >> 6);
}
done:
return ((s - _s) - count);
}
/*
section:
RdOpts
Rdversion
Sexpr
alias
arguments
author
concept
description
details
docType
encoding
examples
format
keyword
name
note
references
section
seealso
source
subsection
synopsis
title
usage
value
macros with argument:
CRANpkg
S3method
S4method
acronym
begin
bold
cite
code
command
dQuote
deqn
describe
dfn
dontrun
dontshow
donttest
email
emph
enc
end
enumerate
env
eqn
file
figure
href
if
ifelse
item
itemize
kbd
link
linkS4class
method
newcommand
option
out
pkg
preformatted
renewcommand
sQuote
samp
special
strong
tabular
testonly
url
var
verb
macro without argument:
R
cr
dots
ge
item
ldots
left
right
tab
*/