1 |
/* $Header$ */ |
2 |
/* |
3 |
* re_*comp and friends - compile REs |
4 |
* This file #includes several others (see the bottom). |
5 |
* |
6 |
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. |
7 |
* |
8 |
* Development of this software was funded, in part, by Cray Research Inc., |
9 |
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics |
10 |
* Corporation, none of whom are responsible for the results. The author |
11 |
* thanks all of them. |
12 |
* |
13 |
* Redistribution and use in source and binary forms -- with or without |
14 |
* modification -- are permitted for any purpose, provided that |
15 |
* redistributions in source form retain this entire copyright notice and |
16 |
* indicate the origin and nature of any modifications. |
17 |
* |
18 |
* I'd appreciate being given credit for this package in the documentation |
19 |
* of software which uses it, but that is not a requirement. |
20 |
* |
21 |
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
22 |
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
23 |
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
24 |
* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
25 |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
26 |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
27 |
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
28 |
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
29 |
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
30 |
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 |
* |
32 |
*/ |
33 |
|
34 |
#include "regguts.h" |
35 |
|
36 |
/* |
37 |
* forward declarations, up here so forward datatypes etc. are defined early |
38 |
*/ |
39 |
/* =====^!^===== begin forwards =====^!^===== */ |
40 |
/* automatically gathered by fwd; do not hand-edit */ |
41 |
/* === regcomp.c === */ |
42 |
int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int)); |
43 |
static VOID moresubs _ANSI_ARGS_((struct vars *, int)); |
44 |
static int freev _ANSI_ARGS_((struct vars *, int)); |
45 |
static VOID makesearch _ANSI_ARGS_((struct vars *, struct nfa *)); |
46 |
static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *)); |
47 |
static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int)); |
48 |
static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *)); |
49 |
static VOID nonword _ANSI_ARGS_((struct vars *, int, struct state *, struct state *)); |
50 |
static VOID word _ANSI_ARGS_((struct vars *, int, struct state *, struct state *)); |
51 |
static int scannum _ANSI_ARGS_((struct vars *)); |
52 |
static VOID repeat _ANSI_ARGS_((struct vars *, struct state *, struct state *, int, int)); |
53 |
static VOID bracket _ANSI_ARGS_((struct vars *, struct state *, struct state *)); |
54 |
static VOID cbracket _ANSI_ARGS_((struct vars *, struct state *, struct state *)); |
55 |
static VOID brackpart _ANSI_ARGS_((struct vars *, struct state *, struct state *)); |
56 |
static chr *scanplain _ANSI_ARGS_((struct vars *)); |
57 |
static VOID leaders _ANSI_ARGS_((struct vars *, struct cvec *)); |
58 |
static VOID onechr _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *)); |
59 |
static VOID dovec _ANSI_ARGS_((struct vars *, struct cvec *, struct state *, struct state *)); |
60 |
static celt nextleader _ANSI_ARGS_((struct vars *, pchr, pchr)); |
61 |
static VOID wordchrs _ANSI_ARGS_((struct vars *)); |
62 |
static struct subre *subre _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *)); |
63 |
static VOID freesubre _ANSI_ARGS_((struct vars *, struct subre *)); |
64 |
static VOID freesrnode _ANSI_ARGS_((struct vars *, struct subre *)); |
65 |
static VOID optst _ANSI_ARGS_((struct vars *, struct subre *)); |
66 |
static int numst _ANSI_ARGS_((struct subre *, int)); |
67 |
static VOID markst _ANSI_ARGS_((struct subre *)); |
68 |
static VOID cleanst _ANSI_ARGS_((struct vars *)); |
69 |
static long nfatree _ANSI_ARGS_((struct vars *, struct subre *, FILE *)); |
70 |
static long nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *)); |
71 |
static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int)); |
72 |
static VOID freelacons _ANSI_ARGS_((struct subre *, int)); |
73 |
static VOID rfree _ANSI_ARGS_((regex_t *)); |
74 |
static VOID dump _ANSI_ARGS_((regex_t *, FILE *)); |
75 |
static VOID dumpst _ANSI_ARGS_((struct subre *, FILE *, int)); |
76 |
static VOID stdump _ANSI_ARGS_((struct subre *, FILE *, int)); |
77 |
static char *stid _ANSI_ARGS_((struct subre *, char *, size_t)); |
78 |
/* === regc_lex.c === */ |
79 |
static VOID lexstart _ANSI_ARGS_((struct vars *)); |
80 |
static VOID prefixes _ANSI_ARGS_((struct vars *)); |
81 |
static VOID lexnest _ANSI_ARGS_((struct vars *, chr *, chr *)); |
82 |
static VOID lexword _ANSI_ARGS_((struct vars *)); |
83 |
static int next _ANSI_ARGS_((struct vars *)); |
84 |
static int lexescape _ANSI_ARGS_((struct vars *)); |
85 |
static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int)); |
86 |
static int brenext _ANSI_ARGS_((struct vars *, pchr)); |
87 |
static VOID skip _ANSI_ARGS_((struct vars *)); |
88 |
static chr newline _ANSI_ARGS_((NOPARMS)); |
89 |
#ifdef REG_DEBUG |
90 |
static chr *ch _ANSI_ARGS_((NOPARMS)); |
91 |
#endif |
92 |
static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr)); |
93 |
/* === regc_color.c === */ |
94 |
static VOID initcm _ANSI_ARGS_((struct vars *, struct colormap *)); |
95 |
static VOID freecm _ANSI_ARGS_((struct colormap *)); |
96 |
static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int)); |
97 |
static color setcolor _ANSI_ARGS_((struct colormap *, pchr, pcolor)); |
98 |
static color maxcolor _ANSI_ARGS_((struct colormap *)); |
99 |
static color newcolor _ANSI_ARGS_((struct colormap *)); |
100 |
static VOID freecolor _ANSI_ARGS_((struct colormap *, pcolor)); |
101 |
static color pseudocolor _ANSI_ARGS_((struct colormap *)); |
102 |
static color subcolor _ANSI_ARGS_((struct colormap *, pchr c)); |
103 |
static color newsub _ANSI_ARGS_((struct colormap *, pcolor)); |
104 |
static VOID subrange _ANSI_ARGS_((struct vars *, pchr, pchr, struct state *, struct state *)); |
105 |
static VOID subblock _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *)); |
106 |
static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *)); |
107 |
static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *)); |
108 |
static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *)); |
109 |
static int singleton _ANSI_ARGS_((struct colormap *, pchr c)); |
110 |
static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *)); |
111 |
static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *)); |
112 |
#ifdef REG_DEBUG |
113 |
static VOID dumpcolors _ANSI_ARGS_((struct colormap *, FILE *)); |
114 |
static VOID fillcheck _ANSI_ARGS_((struct colormap *, union tree *, int, FILE *)); |
115 |
static VOID dumpchr _ANSI_ARGS_((pchr, FILE *)); |
116 |
#endif |
117 |
/* === regc_nfa.c === */ |
118 |
static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *)); |
119 |
static VOID freenfa _ANSI_ARGS_((struct nfa *)); |
120 |
static struct state *newstate _ANSI_ARGS_((struct nfa *)); |
121 |
static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag)); |
122 |
static VOID dropstate _ANSI_ARGS_((struct nfa *, struct state *)); |
123 |
static VOID freestate _ANSI_ARGS_((struct nfa *, struct state *)); |
124 |
static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *)); |
125 |
static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *)); |
126 |
static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *)); |
127 |
static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *)); |
128 |
static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor)); |
129 |
static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *)); |
130 |
static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); |
131 |
static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); |
132 |
static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); |
133 |
static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); |
134 |
static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int)); |
135 |
static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); |
136 |
static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); |
137 |
static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, struct state *)); |
138 |
static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); |
139 |
static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *)); |
140 |
static VOID specialcolors _ANSI_ARGS_((struct nfa *)); |
141 |
static long optimize _ANSI_ARGS_((struct nfa *, FILE *)); |
142 |
static VOID pullback _ANSI_ARGS_((struct nfa *, FILE *)); |
143 |
static int pull _ANSI_ARGS_((struct nfa *, struct arc *)); |
144 |
static VOID pushfwd _ANSI_ARGS_((struct nfa *, FILE *)); |
145 |
static int push _ANSI_ARGS_((struct nfa *, struct arc *)); |
146 |
#define INCOMPATIBLE 1 /* destroys arc */ |
147 |
#define SATISFIED 2 /* constraint satisfied */ |
148 |
#define COMPATIBLE 3 /* compatible but not satisfied yet */ |
149 |
static int combine _ANSI_ARGS_((struct arc *, struct arc *)); |
150 |
static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *)); |
151 |
static int unempty _ANSI_ARGS_((struct nfa *, struct arc *)); |
152 |
static VOID cleanup _ANSI_ARGS_((struct nfa *)); |
153 |
static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); |
154 |
static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); |
155 |
static long analyze _ANSI_ARGS_((struct nfa *)); |
156 |
static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *)); |
157 |
static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *)); |
158 |
static VOID freecnfa _ANSI_ARGS_((struct cnfa *)); |
159 |
static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *)); |
160 |
#ifdef REG_DEBUG |
161 |
static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *)); |
162 |
static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *)); |
163 |
static int dumprarcs _ANSI_ARGS_((struct arc *, struct state *, FILE *, int)); |
164 |
static VOID dumparc _ANSI_ARGS_((struct arc *, struct state *, FILE *)); |
165 |
#endif |
166 |
static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *)); |
167 |
#ifdef REG_DEBUG |
168 |
static VOID dumpcstate _ANSI_ARGS_((int, struct carc *, struct cnfa *, FILE *)); |
169 |
#endif |
170 |
/* === regc_cvec.c === */ |
171 |
static struct cvec *newcvec _ANSI_ARGS_((int, int, int)); |
172 |
static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *)); |
173 |
static VOID addchr _ANSI_ARGS_((struct cvec *, pchr)); |
174 |
static VOID addrange _ANSI_ARGS_((struct cvec *, pchr, pchr)); |
175 |
static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *)); |
176 |
static int haschr _ANSI_ARGS_((struct cvec *, pchr)); |
177 |
static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int, int)); |
178 |
static VOID freecvec _ANSI_ARGS_((struct cvec *)); |
179 |
/* === regc_locale.c === */ |
180 |
static int nmcces _ANSI_ARGS_((struct vars *)); |
181 |
static int nleaders _ANSI_ARGS_((struct vars *)); |
182 |
static struct cvec *allmcces _ANSI_ARGS_((struct vars *, struct cvec *)); |
183 |
static celt element _ANSI_ARGS_((struct vars *, chr *, chr *)); |
184 |
static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int)); |
185 |
static int before _ANSI_ARGS_((celt, celt)); |
186 |
static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int)); |
187 |
static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int)); |
188 |
static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr)); |
189 |
static int cmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); |
190 |
static int casecmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); |
191 |
/* automatically gathered by fwd; do not hand-edit */ |
192 |
/* =====^!^===== end forwards =====^!^===== */ |
193 |
|
194 |
|
195 |
|
196 |
/* internal variables, bundled for easy passing around */ |
197 |
struct vars { |
198 |
regex_t *re; |
199 |
chr *now; /* scan pointer into string */ |
200 |
chr *stop; /* end of string */ |
201 |
chr *savenow; /* saved now and stop for "subroutine call" */ |
202 |
chr *savestop; |
203 |
int err; /* error code (0 if none) */ |
204 |
int cflags; /* copy of compile flags */ |
205 |
int lasttype; /* type of previous token */ |
206 |
int nexttype; /* type of next token */ |
207 |
chr nextvalue; /* value (if any) of next token */ |
208 |
int lexcon; /* lexical context type (see lex.c) */ |
209 |
int nsubexp; /* subexpression count */ |
210 |
struct subre **subs; /* subRE pointer vector */ |
211 |
size_t nsubs; /* length of vector */ |
212 |
struct subre *sub10[10]; /* initial vector, enough for most */ |
213 |
struct nfa *nfa; /* the NFA */ |
214 |
struct colormap *cm; /* character color map */ |
215 |
color nlcolor; /* color of newline */ |
216 |
struct state *wordchrs; /* state in nfa holding word-char outarcs */ |
217 |
struct subre *tree; /* subexpression tree */ |
218 |
struct subre *treechain; /* all tree nodes allocated */ |
219 |
struct subre *treefree; /* any free tree nodes */ |
220 |
int ntree; /* number of tree nodes */ |
221 |
struct cvec *cv; /* interface cvec */ |
222 |
struct cvec *cv2; /* utility cvec */ |
223 |
struct cvec *mcces; /* collating-element information */ |
224 |
# define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) |
225 |
struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ |
226 |
struct state *mccepend; /* in nfa, end of MCCE prototypes */ |
227 |
struct subre *lacons; /* lookahead-constraint vector */ |
228 |
int nlacons; /* size of lacons */ |
229 |
}; |
230 |
|
231 |
/* parsing macros; most know that `v' is the struct vars pointer */ |
232 |
#define NEXT() (next(v)) /* advance by one token */ |
233 |
#define SEE(t) (v->nexttype == (t)) /* is next token this? */ |
234 |
#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ |
235 |
#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ |
236 |
#define ISERR() VISERR(v) |
237 |
#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ |
238 |
((vv)->err = (e))) |
239 |
#define ERR(e) VERR(v, e) /* record an error */ |
240 |
#define NOERR() {if (ISERR()) return;} /* if error seen, return */ |
241 |
#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ |
242 |
#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */ |
243 |
#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */ |
244 |
#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ |
245 |
#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) |
246 |
|
247 |
/* token type codes, some also used as NFA arc types */ |
248 |
#define EMPTY 'n' /* no token present */ |
249 |
#define EOS 'e' /* end of string */ |
250 |
#define PLAIN 'p' /* ordinary character */ |
251 |
#define DIGIT 'd' /* digit (in bound) */ |
252 |
#define BACKREF 'b' /* back reference */ |
253 |
#define COLLEL 'I' /* start of [. */ |
254 |
#define ECLASS 'E' /* start of [= */ |
255 |
#define CCLASS 'C' /* start of [: */ |
256 |
#define END 'X' /* end of [. [= [: */ |
257 |
#define RANGE 'R' /* - within [] which might be range delim. */ |
258 |
#define LACON 'L' /* lookahead constraint subRE */ |
259 |
#define AHEAD 'a' /* color-lookahead arc */ |
260 |
#define BEHIND 'r' /* color-lookbehind arc */ |
261 |
#define WBDRY 'w' /* word boundary constraint */ |
262 |
#define NWBDRY 'W' /* non-word-boundary constraint */ |
263 |
#define SBEGIN 'A' /* beginning of string (even if not BOL) */ |
264 |
#define SEND 'Z' /* end of string (even if not EOL) */ |
265 |
#define PREFER 'P' /* length preference */ |
266 |
|
267 |
/* is an arc colored, and hence on a color chain? */ |
268 |
#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ |
269 |
(a)->type == BEHIND) |
270 |
|
271 |
|
272 |
|
273 |
/* static function list */ |
274 |
static struct fns functions = { |
275 |
rfree, /* regfree insides */ |
276 |
}; |
277 |
|
278 |
|
279 |
|
280 |
/* |
281 |
- compile - compile regular expression |
282 |
^ int compile(regex_t *, CONST chr *, size_t, int); |
283 |
*/ |
284 |
int |
285 |
compile(re, string, len, flags) |
286 |
regex_t *re; |
287 |
CONST chr *string; |
288 |
size_t len; |
289 |
int flags; |
290 |
{ |
291 |
struct vars var; |
292 |
struct vars *v = &var; |
293 |
struct guts *g; |
294 |
int i; |
295 |
size_t j; |
296 |
FILE *debug = (flags®_PROGRESS) ? stdout : (FILE *)NULL; |
297 |
# define CNOERR() { if (ISERR()) return freev(v, v->err); } |
298 |
|
299 |
/* sanity checks */ |
300 |
|
301 |
if (re == NULL || string == NULL) |
302 |
return REG_INVARG; |
303 |
if ((flags®_QUOTE) && |
304 |
(flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) |
305 |
return REG_INVARG; |
306 |
if (!(flags®_EXTENDED) && (flags®_ADVF)) |
307 |
return REG_INVARG; |
308 |
|
309 |
/* initial setup (after which freev() is callable) */ |
310 |
v->re = re; |
311 |
v->now = (chr *)string; |
312 |
v->stop = v->now + len; |
313 |
v->savenow = v->savestop = NULL; |
314 |
v->err = 0; |
315 |
v->cflags = flags; |
316 |
v->nsubexp = 0; |
317 |
v->subs = v->sub10; |
318 |
v->nsubs = 10; |
319 |
for (j = 0; j < v->nsubs; j++) |
320 |
v->subs[j] = NULL; |
321 |
v->nfa = NULL; |
322 |
v->cm = NULL; |
323 |
v->nlcolor = COLORLESS; |
324 |
v->wordchrs = NULL; |
325 |
v->tree = NULL; |
326 |
v->treechain = NULL; |
327 |
v->treefree = NULL; |
328 |
v->cv = NULL; |
329 |
v->cv2 = NULL; |
330 |
v->mcces = NULL; |
331 |
v->lacons = NULL; |
332 |
v->nlacons = 0; |
333 |
re->re_magic = REMAGIC; |
334 |
re->re_info = 0; /* bits get set during parse */ |
335 |
re->re_csize = sizeof(chr); |
336 |
re->re_guts = NULL; |
337 |
re->re_fns = VS(&functions); |
338 |
|
339 |
/* more complex setup, malloced things */ |
340 |
re->re_guts = VS(MALLOC(sizeof(struct guts))); |
341 |
if (re->re_guts == NULL) |
342 |
return freev(v, REG_ESPACE); |
343 |
g = (struct guts *)re->re_guts; |
344 |
g->tree = NULL; |
345 |
initcm(v, &g->cmap); |
346 |
v->cm = &g->cmap; |
347 |
g->lacons = NULL; |
348 |
g->nlacons = 0; |
349 |
ZAPCNFA(g->search); |
350 |
v->nfa = newnfa(v, v->cm, (struct nfa *)NULL); |
351 |
CNOERR(); |
352 |
v->cv = newcvec(100, 20, 10); |
353 |
if (v->cv == NULL) |
354 |
return freev(v, REG_ESPACE); |
355 |
i = nmcces(v); |
356 |
if (i > 0) { |
357 |
v->mcces = newcvec(nleaders(v), 0, i); |
358 |
CNOERR(); |
359 |
v->mcces = allmcces(v, v->mcces); |
360 |
leaders(v, v->mcces); |
361 |
addmcce(v->mcces, (chr *)NULL, (chr *)NULL); /* dummy */ |
362 |
} |
363 |
CNOERR(); |
364 |
|
365 |
/* parsing */ |
366 |
lexstart(v); /* also handles prefixes */ |
367 |
if ((v->cflags®_NLSTOP) || (v->cflags®_NLANCH)) { |
368 |
/* assign newline a unique color */ |
369 |
v->nlcolor = subcolor(v->cm, newline()); |
370 |
okcolors(v->nfa, v->cm); |
371 |
} |
372 |
CNOERR(); |
373 |
v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final); |
374 |
assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ |
375 |
CNOERR(); |
376 |
assert(v->tree != NULL); |
377 |
|
378 |
/* finish setup of nfa and its subre tree */ |
379 |
specialcolors(v->nfa); |
380 |
CNOERR(); |
381 |
if (debug != NULL) { |
382 |
fprintf(debug, "\n\n\n========= RAW ==========\n"); |
383 |
dumpnfa(v->nfa, debug); |
384 |
dumpst(v->tree, debug, 1); |
385 |
} |
386 |
optst(v, v->tree); |
387 |
v->ntree = numst(v->tree, 1); |
388 |
markst(v->tree); |
389 |
cleanst(v); |
390 |
if (debug != NULL) { |
391 |
fprintf(debug, "\n\n\n========= TREE FIXED ==========\n"); |
392 |
dumpst(v->tree, debug, 1); |
393 |
} |
394 |
|
395 |
/* build compacted NFAs for tree and lacons */ |
396 |
re->re_info |= nfatree(v, v->tree, debug); |
397 |
CNOERR(); |
398 |
assert(v->nlacons == 0 || v->lacons != NULL); |
399 |
for (i = 1; i < v->nlacons; i++) { |
400 |
if (debug != NULL) |
401 |
fprintf(debug, "\n\n\n========= LA%d ==========\n", i); |
402 |
nfanode(v, &v->lacons[i], debug); |
403 |
} |
404 |
CNOERR(); |
405 |
if (v->tree->flags&SHORTER) |
406 |
NOTE(REG_USHORTEST); |
407 |
|
408 |
/* build compacted NFAs for tree, lacons, fast search */ |
409 |
if (debug != NULL) |
410 |
fprintf(debug, "\n\n\n========= SEARCH ==========\n"); |
411 |
/* can sacrifice main NFA now, so use it as work area */ |
412 |
(DISCARD)optimize(v->nfa, debug); |
413 |
CNOERR(); |
414 |
makesearch(v, v->nfa); |
415 |
CNOERR(); |
416 |
compact(v->nfa, &g->search); |
417 |
CNOERR(); |
418 |
|
419 |
/* looks okay, package it up */ |
420 |
re->re_nsub = v->nsubexp; |
421 |
v->re = NULL; /* freev no longer frees re */ |
422 |
g->magic = GUTSMAGIC; |
423 |
g->cflags = v->cflags; |
424 |
g->info = re->re_info; |
425 |
g->nsub = re->re_nsub; |
426 |
g->tree = v->tree; |
427 |
v->tree = NULL; |
428 |
g->ntree = v->ntree; |
429 |
g->compare = (v->cflags®_ICASE) ? casecmp : cmp; |
430 |
g->lacons = v->lacons; |
431 |
v->lacons = NULL; |
432 |
g->nlacons = v->nlacons; |
433 |
|
434 |
if (flags®_DUMP) |
435 |
dump(re, stdout); |
436 |
|
437 |
assert(v->err == 0); |
438 |
return freev(v, 0); |
439 |
} |
440 |
|
441 |
/* |
442 |
- moresubs - enlarge subRE vector |
443 |
^ static VOID moresubs(struct vars *, int); |
444 |
*/ |
445 |
static VOID |
446 |
moresubs(v, wanted) |
447 |
struct vars *v; |
448 |
int wanted; /* want enough room for this one */ |
449 |
{ |
450 |
struct subre **p; |
451 |
size_t n; |
452 |
|
453 |
assert(wanted > 0 && (size_t)wanted >= v->nsubs); |
454 |
n = (size_t)wanted * 3 / 2 + 1; |
455 |
if (v->subs == v->sub10) { |
456 |
p = (struct subre **)MALLOC(n * sizeof(struct subre *)); |
457 |
if (p != NULL) |
458 |
memcpy(VS(p), VS(v->subs), |
459 |
v->nsubs * sizeof(struct subre *)); |
460 |
} else |
461 |
p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *)); |
462 |
if (p == NULL) { |
463 |
ERR(REG_ESPACE); |
464 |
return; |
465 |
} |
466 |
v->subs = p; |
467 |
for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) |
468 |
*p = NULL; |
469 |
assert(v->nsubs == n); |
470 |
assert((size_t)wanted < v->nsubs); |
471 |
} |
472 |
|
473 |
/* |
474 |
- freev - free vars struct's substructures where necessary |
475 |
* Optionally does error-number setting, and always returns error code |
476 |
* (if any), to make error-handling code terser. |
477 |
^ static int freev(struct vars *, int); |
478 |
*/ |
479 |
static int |
480 |
freev(v, err) |
481 |
struct vars *v; |
482 |
int err; |
483 |
{ |
484 |
if (v->re != NULL) |
485 |
rfree(v->re); |
486 |
if (v->subs != v->sub10) |
487 |
FREE(v->subs); |
488 |
if (v->nfa != NULL) |
489 |
freenfa(v->nfa); |
490 |
if (v->tree != NULL) |
491 |
freesubre(v, v->tree); |
492 |
if (v->treechain != NULL) |
493 |
cleanst(v); |
494 |
if (v->cv != NULL) |
495 |
freecvec(v->cv); |
496 |
if (v->cv2 != NULL) |
497 |
freecvec(v->cv2); |
498 |
if (v->mcces != NULL) |
499 |
freecvec(v->mcces); |
500 |
if (v->lacons != NULL) |
501 |
freelacons(v->lacons, v->nlacons); |
502 |
ERR(err); /* nop if err==0 */ |
503 |
|
504 |
return v->err; |
505 |
} |
506 |
|
507 |
/* |
508 |
- makesearch - turn an NFA into a search NFA (implicit prepend of .*?) |
509 |
* NFA must have been optimize()d already. |
510 |
^ static VOID makesearch(struct vars *, struct nfa *); |
511 |
*/ |
512 |
static VOID |
513 |
makesearch(v, nfa) |
514 |
struct vars *v; |
515 |
struct nfa *nfa; |
516 |
{ |
517 |
struct arc *a; |
518 |
struct arc *b; |
519 |
struct state *pre = nfa->pre; |
520 |
struct state *s; |
521 |
struct state *s2; |
522 |
struct state *slist; |
523 |
|
524 |
/* no loops are needed if it's anchored */ |
525 |
for (a = pre->outs; a != NULL; a = a->outchain) { |
526 |
assert(a->type == PLAIN); |
527 |
if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) |
528 |
break; |
529 |
} |
530 |
if (a != NULL) { |
531 |
/* add implicit .* in front */ |
532 |
rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre); |
533 |
|
534 |
/* and ^* and \A* too -- not always necessary, but harmless */ |
535 |
newarc(nfa, PLAIN, nfa->bos[0], pre, pre); |
536 |
newarc(nfa, PLAIN, nfa->bos[1], pre, pre); |
537 |
} |
538 |
|
539 |
/* |
540 |
* Now here's the subtle part. Because many REs have no lookback |
541 |
* constraints, often knowing when you were in the pre state tells |
542 |
* you little; it's the next state(s) that are informative. But |
543 |
* some of them may have other inarcs, i.e. it may be possible to |
544 |
* make actual progress and then return to one of them. We must |
545 |
* de-optimize such cases, splitting each such state into progress |
546 |
* and no-progress states. |
547 |
*/ |
548 |
|
549 |
/* first, make a list of the states */ |
550 |
slist = NULL; |
551 |
for (a = pre->outs; a != NULL; a = a->outchain) { |
552 |
s = a->to; |
553 |
for (b = s->ins; b != NULL; b = b->inchain) |
554 |
if (b->from != pre) |
555 |
break; |
556 |
if (b != NULL) { /* must be split */ |
557 |
s->tmp = slist; |
558 |
slist = s; |
559 |
} |
560 |
} |
561 |
|
562 |
/* do the splits */ |
563 |
for (s = slist; s != NULL; s = s2) { |
564 |
s2 = newstate(nfa); |
565 |
copyouts(nfa, s, s2); |
566 |
for (a = s->ins; a != NULL; a = b) { |
567 |
b = a->inchain; |
568 |
if (a->from != pre) { |
569 |
cparc(nfa, a, a->from, s2); |
570 |
freearc(nfa, a); |
571 |
} |
572 |
} |
573 |
s2 = s->tmp; |
574 |
s->tmp = NULL; /* clean up while we're at it */ |
575 |
} |
576 |
} |
577 |
|
578 |
/* |
579 |
- parse - parse an RE |
580 |
* This is actually just the top level, which parses a bunch of branches |
581 |
* tied together with '|'. They appear in the tree as the left children |
582 |
* of a chain of '|' subres. |
583 |
^ static struct subre *parse(struct vars *, int, int, struct state *, |
584 |
^ struct state *); |
585 |
*/ |
586 |
static struct subre * |
587 |
parse(v, stopper, type, init, final) |
588 |
struct vars *v; |
589 |
int stopper; /* EOS or ')' */ |
590 |
int type; /* LACON (lookahead subRE) or PLAIN */ |
591 |
struct state *init; /* initial state */ |
592 |
struct state *final; /* final state */ |
593 |
{ |
594 |
struct state *left; /* scaffolding for branch */ |
595 |
struct state *right; |
596 |
struct subre *branches; /* top level */ |
597 |
struct subre *branch; /* current branch */ |
598 |
struct subre *t; /* temporary */ |
599 |
int firstbranch; /* is this the first branch? */ |
600 |
|
601 |
assert(stopper == ')' || stopper == EOS); |
602 |
|
603 |
branches = subre(v, '|', LONGER, init, final); |
604 |
NOERRN(); |
605 |
branch = branches; |
606 |
firstbranch = 1; |
607 |
do { /* a branch */ |
608 |
if (!firstbranch) { |
609 |
/* need a place to hang it */ |
610 |
branch->right = subre(v, '|', LONGER, init, final); |
611 |
NOERRN(); |
612 |
branch = branch->right; |
613 |
} |
614 |
firstbranch = 0; |
615 |
left = newstate(v->nfa); |
616 |
right = newstate(v->nfa); |
617 |
NOERRN(); |
618 |
EMPTYARC(init, left); |
619 |
EMPTYARC(right, final); |
620 |
NOERRN(); |
621 |
branch->left = parsebranch(v, stopper, type, left, right, 0); |
622 |
NOERRN(); |
623 |
branch->flags |= UP(branch->flags | branch->left->flags); |
624 |
if ((branch->flags &~ branches->flags) != 0) /* new flags */ |
625 |
for (t = branches; t != branch; t = t->right) |
626 |
t->flags |= branch->flags; |
627 |
} while (EAT('|')); |
628 |
assert(SEE(stopper) || SEE(EOS)); |
629 |
|
630 |
if (!SEE(stopper)) { |
631 |
assert(stopper == ')' && SEE(EOS)); |
632 |
ERR(REG_EPAREN); |
633 |
} |
634 |
|
635 |
/* optimize out simple cases */ |
636 |
if (branch == branches) { /* only one branch */ |
637 |
assert(branch->right == NULL); |
638 |
t = branch->left; |
639 |
branch->left = NULL; |
640 |
freesubre(v, branches); |
641 |
branches = t; |
642 |
} else if (!MESSY(branches->flags)) { /* no interesting innards */ |
643 |
freesubre(v, branches->left); |
644 |
branches->left = NULL; |
645 |
freesubre(v, branches->right); |
646 |
branches->right = NULL; |
647 |
branches->op = '='; |
648 |
} |
649 |
|
650 |
return branches; |
651 |
} |
652 |
|
653 |
/* |
654 |
- parsebranch - parse one branch of an RE |
655 |
* This mostly manages concatenation, working closely with parseqatom(). |
656 |
* Concatenated things are bundled up as much as possible, with separate |
657 |
* ',' nodes introduced only when necessary due to substructure. |
658 |
^ static struct subre *parsebranch(struct vars *, int, int, struct state *, |
659 |
^ struct state *, int); |
660 |
*/ |
661 |
static struct subre * |
662 |
parsebranch(v, stopper, type, left, right, partial) |
663 |
struct vars *v; |
664 |
int stopper; /* EOS or ')' */ |
665 |
int type; /* LACON (lookahead subRE) or PLAIN */ |
666 |
struct state *left; /* leftmost state */ |
667 |
struct state *right; /* rightmost state */ |
668 |
int partial; /* is this only part of a branch? */ |
669 |
{ |
670 |
struct state *lp; /* left end of current construct */ |
671 |
int seencontent; /* is there anything in this branch yet? */ |
672 |
struct subre *t; |
673 |
|
674 |
lp = left; |
675 |
seencontent = 0; |
676 |
t = subre(v, '=', 0, left, right); /* op '=' is tentative */ |
677 |
NOERRN(); |
678 |
while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) { |
679 |
if (seencontent) { /* implicit concat operator */ |
680 |
lp = newstate(v->nfa); |
681 |
NOERRN(); |
682 |
moveins(v->nfa, right, lp); |
683 |
} |
684 |
seencontent = 1; |
685 |
|
686 |
/* NB, recursion in parseqatom() may swallow rest of branch */ |
687 |
parseqatom(v, stopper, type, lp, right, t); |
688 |
} |
689 |
|
690 |
if (!seencontent) { /* empty branch */ |
691 |
if (!partial) |
692 |
NOTE(REG_UUNSPEC); |
693 |
assert(lp == left); |
694 |
EMPTYARC(left, right); |
695 |
} |
696 |
|
697 |
return t; |
698 |
} |
699 |
|
700 |
/* |
701 |
- parseqatom - parse one quantified atom or constraint of an RE |
702 |
* The bookkeeping near the end cooperates very closely with parsebranch(); |
703 |
* in particular, it contains a recursion that can involve parsing the rest |
704 |
* of the branch, making this function's name somewhat inaccurate. |
705 |
^ static VOID parseqatom(struct vars *, int, int, struct state *, |
706 |
^ struct state *, struct subre *); |
707 |
*/ |
708 |
static VOID |
709 |
parseqatom(v, stopper, type, lp, rp, top) |
710 |
struct vars *v; |
711 |
int stopper; /* EOS or ')' */ |
712 |
int type; /* LACON (lookahead subRE) or PLAIN */ |
713 |
struct state *lp; /* left state to hang it on */ |
714 |
struct state *rp; /* right state to hang it on */ |
715 |
struct subre *top; /* subtree top */ |
716 |
{ |
717 |
struct state *s; /* temporaries for new states */ |
718 |
struct state *s2; |
719 |
# define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) |
720 |
int m, n; |
721 |
struct subre *atom; /* atom's subtree */ |
722 |
struct subre *t; |
723 |
int cap; /* capturing parens? */ |
724 |
int pos; /* positive lookahead? */ |
725 |
int subno; /* capturing-parens or backref number */ |
726 |
int atomtype; |
727 |
int qprefer; /* quantifier short/long preference */ |
728 |
int f; |
729 |
struct subre **atomp; /* where the pointer to atom is */ |
730 |
|
731 |
/* initial bookkeeping */ |
732 |
atom = NULL; |
733 |
assert(lp->nouts == 0); /* must string new code */ |
734 |
assert(rp->nins == 0); /* between lp and rp */ |
735 |
subno = 0; /* just to shut lint up */ |
736 |
|
737 |
/* an atom or constraint... */ |
738 |
atomtype = v->nexttype; |
739 |
switch (atomtype) { |
740 |
/* first, constraints, which end by returning */ |
741 |
case '^': |
742 |
ARCV('^', 1); |
743 |
if (v->cflags®_NLANCH) |
744 |
ARCV(BEHIND, v->nlcolor); |
745 |
NEXT(); |
746 |
return; |
747 |
break; |
748 |
case '$': |
749 |
ARCV('$', 1); |
750 |
if (v->cflags®_NLANCH) |
751 |
ARCV(AHEAD, v->nlcolor); |
752 |
NEXT(); |
753 |
return; |
754 |
break; |
755 |
case SBEGIN: |
756 |
ARCV('^', 1); /* BOL */ |
757 |
ARCV('^', 0); /* or BOS */ |
758 |
NEXT(); |
759 |
return; |
760 |
break; |
761 |
case SEND: |
762 |
ARCV('$', 1); /* EOL */ |
763 |
ARCV('$', 0); /* or EOS */ |
764 |
NEXT(); |
765 |
return; |
766 |
break; |
767 |
case '<': |
768 |
wordchrs(v); /* does NEXT() */ |
769 |
s = newstate(v->nfa); |
770 |
NOERR(); |
771 |
nonword(v, BEHIND, lp, s); |
772 |
word(v, AHEAD, s, rp); |
773 |
return; |
774 |
break; |
775 |
case '>': |
776 |
wordchrs(v); /* does NEXT() */ |
777 |
s = newstate(v->nfa); |
778 |
NOERR(); |
779 |
word(v, BEHIND, lp, s); |
780 |
nonword(v, AHEAD, s, rp); |
781 |
return; |
782 |
break; |
783 |
case WBDRY: |
784 |
wordchrs(v); /* does NEXT() */ |
785 |
s = newstate(v->nfa); |
786 |
NOERR(); |
787 |
nonword(v, BEHIND, lp, s); |
788 |
word(v, AHEAD, s, rp); |
789 |
s = newstate(v->nfa); |
790 |
NOERR(); |
791 |
word(v, BEHIND, lp, s); |
792 |
nonword(v, AHEAD, s, rp); |
793 |
return; |
794 |
break; |
795 |
case NWBDRY: |
796 |
wordchrs(v); /* does NEXT() */ |
797 |
s = newstate(v->nfa); |
798 |
NOERR(); |
799 |
word(v, BEHIND, lp, s); |
800 |
word(v, AHEAD, s, rp); |
801 |
s = newstate(v->nfa); |
802 |
NOERR(); |
803 |
nonword(v, BEHIND, lp, s); |
804 |
nonword(v, AHEAD, s, rp); |
805 |
return; |
806 |
break; |
807 |
case LACON: /* lookahead constraint */ |
808 |
pos = v->nextvalue; |
809 |
NEXT(); |
810 |
s = newstate(v->nfa); |
811 |
s2 = newstate(v->nfa); |
812 |
NOERR(); |
813 |
t = parse(v, ')', LACON, s, s2); |
814 |
freesubre(v, t); /* internal structure irrelevant */ |
815 |
assert(SEE(')') || ISERR()); |
816 |
NEXT(); |
817 |
n = newlacon(v, s, s2, pos); |
818 |
NOERR(); |
819 |
ARCV(LACON, n); |
820 |
return; |
821 |
break; |
822 |
/* then errors, to get them out of the way */ |
823 |
case '*': |
824 |
case '+': |
825 |
case '?': |
826 |
case '{': |
827 |
ERR(REG_BADRPT); |
828 |
return; |
829 |
break; |
830 |
default: |
831 |
ERR(REG_ASSERT); |
832 |
return; |
833 |
break; |
834 |
/* then plain characters, and minor variants on that theme */ |
835 |
case ')': /* unbalanced paren */ |
836 |
if ((v->cflags®_ADVANCED) != REG_EXTENDED) { |
837 |
ERR(REG_EPAREN); |
838 |
return; |
839 |
} |
840 |
/* legal in EREs due to specification botch */ |
841 |
NOTE(REG_UPBOTCH); |
842 |
/* fallthrough into case PLAIN */ |
843 |
case PLAIN: |
844 |
onechr(v, v->nextvalue, lp, rp); |
845 |
okcolors(v->nfa, v->cm); |
846 |
NOERR(); |
847 |
NEXT(); |
848 |
break; |
849 |
case '[': |
850 |
if (v->nextvalue == 1) |
851 |
bracket(v, lp, rp); |
852 |
else |
853 |
cbracket(v, lp, rp); |
854 |
assert(SEE(']') || ISERR()); |
855 |
NEXT(); |
856 |
break; |
857 |
case '.': |
858 |
rainbow(v->nfa, v->cm, PLAIN, |
859 |
(v->cflags®_NLSTOP) ? v->nlcolor : COLORLESS, |
860 |
lp, rp); |
861 |
NEXT(); |
862 |
break; |
863 |
/* and finally the ugly stuff */ |
864 |
case '(': /* value flags as capturing or non */ |
865 |
cap = (type == LACON) ? 0 : v->nextvalue; |
866 |
if (cap) { |
867 |
v->nsubexp++; |
868 |
subno = v->nsubexp; |
869 |
if ((size_t)subno >= v->nsubs) |
870 |
moresubs(v, subno); |
871 |
assert((size_t)subno < v->nsubs); |
872 |
} else |
873 |
atomtype = PLAIN; /* something that's not '(' */ |
874 |
NEXT(); |
875 |
/* need new endpoints because tree will contain pointers */ |
876 |
s = newstate(v->nfa); |
877 |
s2 = newstate(v->nfa); |
878 |
NOERR(); |
879 |
EMPTYARC(lp, s); |
880 |
EMPTYARC(s2, rp); |
881 |
NOERR(); |
882 |
atom = parse(v, ')', PLAIN, s, s2); |
883 |
assert(SEE(')') || ISERR()); |
884 |
NEXT(); |
885 |
NOERR(); |
886 |
if (cap) { |
887 |
v->subs[subno] = atom; |
888 |
t = subre(v, '(', atom->flags|CAP, lp, rp); |
889 |
NOERR(); |
890 |
t->subno = subno; |
891 |
t->left = atom; |
892 |
atom = t; |
893 |
} |
894 |
/* postpone everything else pending possible {0} */ |
895 |
break; |
896 |
case BACKREF: /* the Feature From The Black Lagoon */ |
897 |
INSIST(type != LACON, REG_ESUBREG); |
898 |
INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); |
899 |
INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); |
900 |
NOERR(); |
901 |
assert(v->nextvalue > 0); |
902 |
atom = subre(v, 'b', BACKR, lp, rp); |
903 |
subno = v->nextvalue; |
904 |
atom->subno = subno; |
905 |
EMPTYARC(lp, rp); /* temporarily, so there's something */ |
906 |
NEXT(); |
907 |
break; |
908 |
} |
909 |
|
910 |
/* ...and an atom may be followed by a quantifier */ |
911 |
switch (v->nexttype) { |
912 |
case '*': |
913 |
m = 0; |
914 |
n = INFINITY; |
915 |
qprefer = (v->nextvalue) ? LONGER : SHORTER; |
916 |
NEXT(); |
917 |
break; |
918 |
case '+': |
919 |
m = 1; |
920 |
n = INFINITY; |
921 |
qprefer = (v->nextvalue) ? LONGER : SHORTER; |
922 |
NEXT(); |
923 |
break; |
924 |
case '?': |
925 |
m = 0; |
926 |
n = 1; |
927 |
qprefer = (v->nextvalue) ? LONGER : SHORTER; |
928 |
NEXT(); |
929 |
break; |
930 |
case '{': |
931 |
NEXT(); |
932 |
m = scannum(v); |
933 |
if (EAT(',')) { |
934 |
if (SEE(DIGIT)) |
935 |
n = scannum(v); |
936 |
else |
937 |
n = INFINITY; |
938 |
if (m > n) { |
939 |
ERR(REG_BADBR); |
940 |
return; |
941 |
} |
942 |
/* {m,n} exercises preference, even if it's {m,m} */ |
943 |
qprefer = (v->nextvalue) ? LONGER : SHORTER; |
944 |
} else { |
945 |
n = m; |
946 |
/* {m} passes operand's preference through */ |
947 |
qprefer = 0; |
948 |
} |
949 |
if (!SEE('}')) { /* catches errors too */ |
950 |
ERR(REG_BADBR); |
951 |
return; |
952 |
} |
953 |
NEXT(); |
954 |
break; |
955 |
default: /* no quantifier */ |
956 |
m = n = 1; |
957 |
qprefer = 0; |
958 |
break; |
959 |
} |
960 |
|
961 |
/* annoying special case: {0} or {0,0} cancels everything */ |
962 |
if (m == 0 && n == 0) { |
963 |
if (atom != NULL) |
964 |
freesubre(v, atom); |
965 |
if (atomtype == '(') |
966 |
v->subs[subno] = NULL; |
967 |
delsub(v->nfa, lp, rp); |
968 |
EMPTYARC(lp, rp); |
969 |
return; |
970 |
} |
971 |
|
972 |
/* if not a messy case, avoid hard part */ |
973 |
assert(!MESSY(top->flags)); |
974 |
f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0); |
975 |
if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) { |
976 |
if (!(m == 1 && n == 1)) |
977 |
repeat(v, lp, rp, m, n); |
978 |
if (atom != NULL) |
979 |
freesubre(v, atom); |
980 |
top->flags = f; |
981 |
return; |
982 |
} |
983 |
|
984 |
/* |
985 |
* hard part: something messy |
986 |
* That is, capturing parens, back reference, short/long clash, or |
987 |
* an atom with substructure containing one of those. |
988 |
*/ |
989 |
|
990 |
/* now we'll need a subre for the contents even if they're boring */ |
991 |
if (atom == NULL) { |
992 |
atom = subre(v, '=', 0, lp, rp); |
993 |
NOERR(); |
994 |
} |
995 |
|
996 |
/* |
997 |
* prepare a general-purpose state skeleton |
998 |
* |
999 |
* ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] |
1000 |
* / / |
1001 |
* [lp] ----> [s2] ----bypass--------------------- |
1002 |
* |
1003 |
* where bypass is an empty, and prefix is some repetitions of atom |
1004 |
*/ |
1005 |
s = newstate(v->nfa); /* first, new endpoints for the atom */ |
1006 |
s2 = newstate(v->nfa); |
1007 |
NOERR(); |
1008 |
moveouts(v->nfa, lp, s); |
1009 |
moveins(v->nfa, rp, s2); |
1010 |
NOERR(); |
1011 |
atom->begin = s; |
1012 |
atom->end = s2; |
1013 |
s = newstate(v->nfa); /* and spots for prefix and bypass */ |
1014 |
s2 = newstate(v->nfa); |
1015 |
NOERR(); |
1016 |
EMPTYARC(lp, s); |
1017 |
EMPTYARC(lp, s2); |
1018 |
NOERR(); |
1019 |
|
1020 |
/* break remaining subRE into x{...} and what follows */ |
1021 |
t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp); |
1022 |
t->left = atom; |
1023 |
atomp = &t->left; |
1024 |
/* here we should recurse... but we must postpone that to the end */ |
1025 |
|
1026 |
/* split top into prefix and remaining */ |
1027 |
assert(top->op == '=' && top->left == NULL && top->right == NULL); |
1028 |
top->left = subre(v, '=', top->flags, top->begin, lp); |
1029 |
top->op = '.'; |
1030 |
top->right = t; |
1031 |
|
1032 |
/* if it's a backref, now is the time to replicate the subNFA */ |
1033 |
if (atomtype == BACKREF) { |
1034 |
assert(atom->begin->nouts == 1); /* just the EMPTY */ |
1035 |
delsub(v->nfa, atom->begin, atom->end); |
1036 |
assert(v->subs[subno] != NULL); |
1037 |
/* and here's why the recursion got postponed: it must */ |
1038 |
/* wait until the skeleton is filled in, because it may */ |
1039 |
/* hit a backref that wants to copy the filled-in skeleton */ |
1040 |
dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end, |
1041 |
atom->begin, atom->end); |
1042 |
NOERR(); |
1043 |
} |
1044 |
|
1045 |
/* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */ |
1046 |
if (m == 0) { |
1047 |
EMPTYARC(s2, atom->end); /* the bypass */ |
1048 |
assert(PREF(qprefer) != 0); |
1049 |
f = COMBINE(qprefer, atom->flags); |
1050 |
t = subre(v, '|', f, lp, atom->end); |
1051 |
NOERR(); |
1052 |
t->left = atom; |
1053 |
t->right = subre(v, '|', PREF(f), s2, atom->end); |
1054 |
NOERR(); |
1055 |
t->right->left = subre(v, '=', 0, s2, atom->end); |
1056 |
NOERR(); |
1057 |
*atomp = t; |
1058 |
atomp = &t->left; |
1059 |
m = 1; |
1060 |
} |
1061 |
|
1062 |
/* deal with the rest of the quantifier */ |
1063 |
if (atomtype == BACKREF) { |
1064 |
/* special case: backrefs have internal quantifiers */ |
1065 |
EMPTYARC(s, atom->begin); /* empty prefix */ |
1066 |
/* just stuff everything into atom */ |
1067 |
repeat(v, atom->begin, atom->end, m, n); |
1068 |
atom->min = (short)m; |
1069 |
atom->max = (short)n; |
1070 |
atom->flags |= COMBINE(qprefer, atom->flags); |
1071 |
} else if (m == 1 && n == 1) { |
1072 |
/* no/vacuous quantifier: done */ |
1073 |
EMPTYARC(s, atom->begin); /* empty prefix */ |
1074 |
} else { |
1075 |
/* turn x{m,n} into x{m-1,n-1}x, with capturing */ |
1076 |
/* parens in only second x */ |
1077 |
dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); |
1078 |
assert(m >= 1 && m != INFINITY && n >= 1); |
1079 |
repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1); |
1080 |
f = COMBINE(qprefer, atom->flags); |
1081 |
t = subre(v, '.', f, s, atom->end); /* prefix and atom */ |
1082 |
NOERR(); |
1083 |
t->left = subre(v, '=', PREF(f), s, atom->begin); |
1084 |
NOERR(); |
1085 |
t->right = atom; |
1086 |
*atomp = t; |
1087 |
} |
1088 |
|
1089 |
/* and finally, look after that postponed recursion */ |
1090 |
t = top->right; |
1091 |
if (!(SEE('|') || SEE(stopper) || SEE(EOS))) |
1092 |
t->right = parsebranch(v, stopper, type, atom->end, rp, 1); |
1093 |
else { |
1094 |
EMPTYARC(atom->end, rp); |
1095 |
t->right = subre(v, '=', 0, atom->end, rp); |
1096 |
} |
1097 |
assert(SEE('|') || SEE(stopper) || SEE(EOS)); |
1098 |
t->flags |= COMBINE(t->flags, t->right->flags); |
1099 |
top->flags |= COMBINE(top->flags, t->flags); |
1100 |
} |
1101 |
|
1102 |
/* |
1103 |
- nonword - generate arcs for non-word-character ahead or behind |
1104 |
^ static VOID nonword(struct vars *, int, struct state *, struct state *); |
1105 |
*/ |
1106 |
static VOID |
1107 |
nonword(v, dir, lp, rp) |
1108 |
struct vars *v; |
1109 |
int dir; /* AHEAD or BEHIND */ |
1110 |
struct state *lp; |
1111 |
struct state *rp; |
1112 |
{ |
1113 |
int anchor = (dir == AHEAD) ? '$' : '^'; |
1114 |
|
1115 |
assert(dir == AHEAD || dir == BEHIND); |
1116 |
newarc(v->nfa, anchor, 1, lp, rp); |
1117 |
newarc(v->nfa, anchor, 0, lp, rp); |
1118 |
colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp); |
1119 |
/* (no need for special attention to \n) */ |
1120 |
} |
1121 |
|
1122 |
/* |
1123 |
- word - generate arcs for word character ahead or behind |
1124 |
^ static VOID word(struct vars *, int, struct state *, struct state *); |
1125 |
*/ |
1126 |
static VOID |
1127 |
word(v, dir, lp, rp) |
1128 |
struct vars *v; |
1129 |
int dir; /* AHEAD or BEHIND */ |
1130 |
struct state *lp; |
1131 |
struct state *rp; |
1132 |
{ |
1133 |
assert(dir == AHEAD || dir == BEHIND); |
1134 |
cloneouts(v->nfa, v->wordchrs, lp, rp, dir); |
1135 |
/* (no need for special attention to \n) */ |
1136 |
} |
1137 |
|
1138 |
/* |
1139 |
- scannum - scan a number |
1140 |
^ static int scannum(struct vars *); |
1141 |
*/ |
1142 |
static int /* value, <= DUPMAX */ |
1143 |
scannum(v) |
1144 |
struct vars *v; |
1145 |
{ |
1146 |
int n = 0; |
1147 |
|
1148 |
while (SEE(DIGIT) && n < DUPMAX) { |
1149 |
n = n*10 + v->nextvalue; |
1150 |
NEXT(); |
1151 |
} |
1152 |
if (SEE(DIGIT) || n > DUPMAX) { |
1153 |
ERR(REG_BADBR); |
1154 |
return 0; |
1155 |
} |
1156 |
return n; |
1157 |
} |
1158 |
|
1159 |
/* |
1160 |
- repeat - replicate subNFA for quantifiers |
1161 |
* The duplication sequences used here are chosen carefully so that any |
1162 |
* pointers starting out pointing into the subexpression end up pointing into |
1163 |
* the last occurrence. (Note that it may not be strung between the same |
1164 |
* left and right end states, however!) This used to be important for the |
1165 |
* subRE tree, although the important bits are now handled by the in-line |
1166 |
* code in parse(), and when this is called, it doesn't matter any more. |
1167 |
^ static VOID repeat(struct vars *, struct state *, struct state *, int, int); |
1168 |
*/ |
1169 |
static VOID |
1170 |
repeat(v, lp, rp, m, n) |
1171 |
struct vars *v; |
1172 |
struct state *lp; |
1173 |
struct state *rp; |
1174 |
int m; |
1175 |
int n; |
1176 |
{ |
1177 |
# define SOME 2 |
1178 |
# define INF 3 |
1179 |
# define PAIR(x, y) ((x)*4 + (y)) |
1180 |
# define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) |
1181 |
CONST int rm = REDUCE(m); |
1182 |
CONST int rn = REDUCE(n); |
1183 |
struct state *s; |
1184 |
struct state *s2; |
1185 |
|
1186 |
switch (PAIR(rm, rn)) { |
1187 |
case PAIR(0, 0): /* empty string */ |
1188 |
delsub(v->nfa, lp, rp); |
1189 |
EMPTYARC(lp, rp); |
1190 |
break; |
1191 |
case PAIR(0, 1): /* do as x| */ |
1192 |
EMPTYARC(lp, rp); |
1193 |
break; |
1194 |
case PAIR(0, SOME): /* do as x{1,n}| */ |
1195 |
repeat(v, lp, rp, 1, n); |
1196 |
NOERR(); |
1197 |
EMPTYARC(lp, rp); |
1198 |
break; |
1199 |
case PAIR(0, INF): /* loop x around */ |
1200 |
s = newstate(v->nfa); |
1201 |
NOERR(); |
1202 |
moveouts(v->nfa, lp, s); |
1203 |
moveins(v->nfa, rp, s); |
1204 |
EMPTYARC(lp, s); |
1205 |
EMPTYARC(s, rp); |
1206 |
break; |
1207 |
case PAIR(1, 1): /* no action required */ |
1208 |
break; |
1209 |
case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ |
1210 |
s = newstate(v->nfa); |
1211 |
NOERR(); |
1212 |
moveouts(v->nfa, lp, s); |
1213 |
dupnfa(v->nfa, s, rp, lp, s); |
1214 |
NOERR(); |
1215 |
repeat(v, lp, s, 1, n-1); |
1216 |
NOERR(); |
1217 |
EMPTYARC(lp, s); |
1218 |
break; |
1219 |
case PAIR(1, INF): /* add loopback arc */ |
1220 |
s = newstate(v->nfa); |
1221 |
s2 = newstate(v->nfa); |
1222 |
NOERR(); |
1223 |
moveouts(v->nfa, lp, s); |
1224 |
moveins(v->nfa, rp, s2); |
1225 |
EMPTYARC(lp, s); |
1226 |
EMPTYARC(s2, rp); |
1227 |
EMPTYARC(s2, s); |
1228 |
break; |
1229 |
case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ |
1230 |
s = newstate(v->nfa); |
1231 |
NOERR(); |
1232 |
moveouts(v->nfa, lp, s); |
1233 |
dupnfa(v->nfa, s, rp, lp, s); |
1234 |
NOERR(); |
1235 |
repeat(v, lp, s, m-1, n-1); |
1236 |
break; |
1237 |
case PAIR(SOME, INF): /* do as x{m-1,}x */ |
1238 |
s = newstate(v->nfa); |
1239 |
NOERR(); |
1240 |
moveouts(v->nfa, lp, s); |
1241 |
dupnfa(v->nfa, s, rp, lp, s); |
1242 |
NOERR(); |
1243 |
repeat(v, lp, s, m-1, n); |
1244 |
break; |
1245 |
default: |
1246 |
ERR(REG_ASSERT); |
1247 |
break; |
1248 |
} |
1249 |
} |
1250 |
|
1251 |
/* |
1252 |
- bracket - handle non-complemented bracket expression |
1253 |
* Also called from cbracket for complemented bracket expressions. |
1254 |
^ static VOID bracket(struct vars *, struct state *, struct state *); |
1255 |
*/ |
1256 |
static VOID |
1257 |
bracket(v, lp, rp) |
1258 |
struct vars *v; |
1259 |
struct state *lp; |
1260 |
struct state *rp; |
1261 |
{ |
1262 |
assert(SEE('[')); |
1263 |
NEXT(); |
1264 |
while (!SEE(']') && !SEE(EOS)) |
1265 |
brackpart(v, lp, rp); |
1266 |
assert(SEE(']') || ISERR()); |
1267 |
okcolors(v->nfa, v->cm); |
1268 |
} |
1269 |
|
1270 |
/* |
1271 |
- cbracket - handle complemented bracket expression |
1272 |
* We do it by calling bracket() with dummy endpoints, and then complementing |
1273 |
* the result. The alternative would be to invoke rainbow(), and then delete |
1274 |
* arcs as the b.e. is seen... but that gets messy. |
1275 |
^ static VOID cbracket(struct vars *, struct state *, struct state *); |
1276 |
*/ |
1277 |
static VOID |
1278 |
cbracket(v, lp, rp) |
1279 |
struct vars *v; |
1280 |
struct state *lp; |
1281 |
struct state *rp; |
1282 |
{ |
1283 |
struct state *left = newstate(v->nfa); |
1284 |
struct state *right = newstate(v->nfa); |
1285 |
struct state *s; |
1286 |
struct arc *a; /* arc from lp */ |
1287 |
struct arc *ba; /* arc from left, from bracket() */ |
1288 |
struct arc *pa; /* MCCE-prototype arc */ |
1289 |
color co; |
1290 |
chr *p; |
1291 |
int i; |
1292 |
|
1293 |
NOERR(); |
1294 |
bracket(v, left, right); |
1295 |
if (v->cflags®_NLSTOP) |
1296 |
newarc(v->nfa, PLAIN, v->nlcolor, left, right); |
1297 |
NOERR(); |
1298 |
|
1299 |
assert(lp->nouts == 0); /* all outarcs will be ours */ |
1300 |
|
1301 |
/* easy part of complementing */ |
1302 |
colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); |
1303 |
NOERR(); |
1304 |
if (v->mcces == NULL) { /* no MCCEs -- we're done */ |
1305 |
dropstate(v->nfa, left); |
1306 |
assert(right->nins == 0); |
1307 |
freestate(v->nfa, right); |
1308 |
return; |
1309 |
} |
1310 |
|
1311 |
/* but complementing gets messy in the presence of MCCEs... */ |
1312 |
NOTE(REG_ULOCALE); |
1313 |
for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { |
1314 |
co = GETCOLOR(v->cm, *p); |
1315 |
a = findarc(lp, PLAIN, co); |
1316 |
ba = findarc(left, PLAIN, co); |
1317 |
if (ba == NULL) { |
1318 |
assert(a != NULL); |
1319 |
freearc(v->nfa, a); |
1320 |
} else { |
1321 |
assert(a == NULL); |
1322 |
} |
1323 |
s = newstate(v->nfa); |
1324 |
NOERR(); |
1325 |
newarc(v->nfa, PLAIN, co, lp, s); |
1326 |
NOERR(); |
1327 |
pa = findarc(v->mccepbegin, PLAIN, co); |
1328 |
assert(pa != NULL); |
1329 |
if (ba == NULL) { /* easy case, need all of them */ |
1330 |
cloneouts(v->nfa, pa->to, s, rp, PLAIN); |
1331 |
newarc(v->nfa, '$', 1, s, rp); |
1332 |
newarc(v->nfa, '$', 0, s, rp); |
1333 |
colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); |
1334 |
} else { /* must be selective */ |
1335 |
if (findarc(ba->to, '$', 1) == NULL) { |
1336 |
newarc(v->nfa, '$', 1, s, rp); |
1337 |
newarc(v->nfa, '$', 0, s, rp); |
1338 |
colorcomplement(v->nfa, v->cm, AHEAD, pa->to, |
1339 |
s, rp); |
1340 |
} |
1341 |
for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) |
1342 |
if (findarc(ba->to, PLAIN, pa->co) == NULL) |
1343 |
newarc(v->nfa, PLAIN, pa->co, s, rp); |
1344 |
if (s->nouts == 0) /* limit of selectivity: none */ |
1345 |
dropstate(v->nfa, s); /* frees arc too */ |
1346 |
} |
1347 |
NOERR(); |
1348 |
} |
1349 |
|
1350 |
delsub(v->nfa, left, right); |
1351 |
assert(left->nouts == 0); |
1352 |
freestate(v->nfa, left); |
1353 |
assert(right->nins == 0); |
1354 |
freestate(v->nfa, right); |
1355 |
} |
1356 |
|
1357 |
/* |
1358 |
- brackpart - handle one item (or range) within a bracket expression |
1359 |
^ static VOID brackpart(struct vars *, struct state *, struct state *); |
1360 |
*/ |
1361 |
static VOID |
1362 |
brackpart(v, lp, rp) |
1363 |
struct vars *v; |
1364 |
struct state *lp; |
1365 |
struct state *rp; |
1366 |
{ |
1367 |
celt startc; |
1368 |
celt endc; |
1369 |
struct cvec *cv; |
1370 |
chr *startp; |
1371 |
chr *endp; |
1372 |
chr c[1]; |
1373 |
|
1374 |
/* parse something, get rid of special cases, take shortcuts */ |
1375 |
switch (v->nexttype) { |
1376 |
case RANGE: /* a-b-c or other botch */ |
1377 |
ERR(REG_ERANGE); |
1378 |
return; |
1379 |
break; |
1380 |
case PLAIN: |
1381 |
c[0] = v->nextvalue; |
1382 |
NEXT(); |
1383 |
/* shortcut for ordinary chr (not range, not MCCE leader) */ |
1384 |
if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { |
1385 |
onechr(v, c[0], lp, rp); |
1386 |
return; |
1387 |
} |
1388 |
startc = element(v, c, c+1); |
1389 |
NOERR(); |
1390 |
break; |
1391 |
case COLLEL: |
1392 |
startp = v->now; |
1393 |
endp = scanplain(v); |
1394 |
INSIST(startp < endp, REG_ECOLLATE); |
1395 |
NOERR(); |
1396 |
startc = element(v, startp, endp); |
1397 |
NOERR(); |
1398 |
break; |
1399 |
case ECLASS: |
1400 |
startp = v->now; |
1401 |
endp = scanplain(v); |
1402 |
INSIST(startp < endp, REG_ECOLLATE); |
1403 |
NOERR(); |
1404 |
startc = element(v, startp, endp); |
1405 |
NOERR(); |
1406 |
cv = eclass(v, startc, (v->cflags®_ICASE)); |
1407 |
NOERR(); |
1408 |
dovec(v, cv, lp, rp); |
1409 |
return; |
1410 |
break; |
1411 |
case CCLASS: |
1412 |
startp = v->now; |
1413 |
endp = scanplain(v); |
1414 |
INSIST(startp < endp, REG_ECTYPE); |
1415 |
NOERR(); |
1416 |
cv = cclass(v, startp, endp, (v->cflags®_ICASE)); |
1417 |
NOERR(); |
1418 |
dovec(v, cv, lp, rp); |
1419 |
return; |
1420 |
break; |
1421 |
default: |
1422 |
ERR(REG_ASSERT); |
1423 |
return; |
1424 |
break; |
1425 |
} |
1426 |
|
1427 |
if (SEE(RANGE)) { |
1428 |
NEXT(); |
1429 |
switch (v->nexttype) { |
1430 |
case PLAIN: |
1431 |
case RANGE: |
1432 |
c[0] = v->nextvalue; |
1433 |
NEXT(); |
1434 |
endc = element(v, c, c+1); |
1435 |
NOERR(); |
1436 |
break; |
1437 |
case COLLEL: |
1438 |
startp = v->now; |
1439 |
endp = scanplain(v); |
1440 |
INSIST(startp < endp, REG_ECOLLATE); |
1441 |
NOERR(); |
1442 |
endc = element(v, startp, endp); |
1443 |
NOERR(); |
1444 |
break; |
1445 |
default: |
1446 |
ERR(REG_ERANGE); |
1447 |
return; |
1448 |
break; |
1449 |
} |
1450 |
} else |
1451 |
endc = startc; |
1452 |
|
1453 |
/* |
1454 |
* Ranges are unportable. Actually, standard C does |
1455 |
* guarantee that digits are contiguous, but making |
1456 |
* that an exception is just too complicated. |
1457 |
*/ |
1458 |
if (startc != endc) |
1459 |
NOTE(REG_UUNPORT); |
1460 |
cv = range(v, startc, endc, (v->cflags®_ICASE)); |
1461 |
NOERR(); |
1462 |
dovec(v, cv, lp, rp); |
1463 |
} |
1464 |
|
1465 |
/* |
1466 |
- scanplain - scan PLAIN contents of [. etc. |
1467 |
* Certain bits of trickery in lex.c know that this code does not try |
1468 |
* to look past the final bracket of the [. etc. |
1469 |
^ static chr *scanplain(struct vars *); |
1470 |
*/ |
1471 |
static chr * /* just after end of sequence */ |
1472 |
scanplain(v) |
1473 |
struct vars *v; |
1474 |
{ |
1475 |
chr *endp; |
1476 |
|
1477 |
assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); |
1478 |
NEXT(); |
1479 |
|
1480 |
endp = v->now; |
1481 |
while (SEE(PLAIN)) { |
1482 |
endp = v->now; |
1483 |
NEXT(); |
1484 |
} |
1485 |
|
1486 |
assert(SEE(END) || ISERR()); |
1487 |
NEXT(); |
1488 |
|
1489 |
return endp; |
1490 |
} |
1491 |
|
1492 |
/* |
1493 |
- leaders - process a cvec of collating elements to also include leaders |
1494 |
* Also gives all characters involved their own colors, which is almost |
1495 |
* certainly necessary, and sets up little disconnected subNFA. |
1496 |
^ static VOID leaders(struct vars *, struct cvec *); |
1497 |
*/ |
1498 |
static VOID |
1499 |
leaders(v, cv) |
1500 |
struct vars *v; |
1501 |
struct cvec *cv; |
1502 |
{ |
1503 |
int mcce; |
1504 |
chr *p; |
1505 |
chr leader; |
1506 |
struct state *s; |
1507 |
struct arc *a; |
1508 |
|
1509 |
v->mccepbegin = newstate(v->nfa); |
1510 |
v->mccepend = newstate(v->nfa); |
1511 |
NOERR(); |
1512 |
|
1513 |
for (mcce = 0; mcce < cv->nmcces; mcce++) { |
1514 |
p = cv->mcces[mcce]; |
1515 |
leader = *p; |
1516 |
if (!haschr(cv, leader)) { |
1517 |
addchr(cv, leader); |
1518 |
s = newstate(v->nfa); |
1519 |
newarc(v->nfa, PLAIN, subcolor(v->cm, leader), |
1520 |
v->mccepbegin, s); |
1521 |
okcolors(v->nfa, v->cm); |
1522 |
} else { |
1523 |
a = findarc(v->mccepbegin, PLAIN, |
1524 |
GETCOLOR(v->cm, leader)); |
1525 |
assert(a != NULL); |
1526 |
s = a->to; |
1527 |
assert(s != v->mccepend); |
1528 |
} |
1529 |
p++; |
1530 |
assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */ |
1531 |
newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); |
1532 |
okcolors(v->nfa, v->cm); |
1533 |
} |
1534 |
} |
1535 |
|
1536 |
/* |
1537 |
- onechr - fill in arcs for a plain character, and possible case complements |
1538 |
* This is mostly a shortcut for efficient handling of the common case. |
1539 |
^ static VOID onechr(struct vars *, pchr, struct state *, struct state *); |
1540 |
*/ |
1541 |
static VOID |
1542 |
onechr(v, c, lp, rp) |
1543 |
struct vars *v; |
1544 |
pchr c; |
1545 |
struct state *lp; |
1546 |
struct state *rp; |
1547 |
{ |
1548 |
if (!(v->cflags®_ICASE)) { |
1549 |
newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); |
1550 |
return; |
1551 |
} |
1552 |
|
1553 |
/* rats, need general case anyway... */ |
1554 |
dovec(v, allcases(v, c), lp, rp); |
1555 |
} |
1556 |
|
1557 |
/* |
1558 |
- dovec - fill in arcs for each element of a cvec |
1559 |
* This one has to handle the messy cases, like MCCEs and MCCE leaders. |
1560 |
^ static VOID dovec(struct vars *, struct cvec *, struct state *, |
1561 |
^ struct state *); |
1562 |
*/ |
1563 |
static VOID |
1564 |
dovec(v, cv, lp, rp) |
1565 |
struct vars *v; |
1566 |
struct cvec *cv; |
1567 |
struct state *lp; |
1568 |
struct state *rp; |
1569 |
{ |
1570 |
chr ch, from, to; |
1571 |
celt ce; |
1572 |
chr *p; |
1573 |
int i; |
1574 |
color co; |
1575 |
struct cvec *leads; |
1576 |
struct arc *a; |
1577 |
struct arc *pa; /* arc in prototype */ |
1578 |
struct state *s; |
1579 |
struct state *ps; /* state in prototype */ |
1580 |
|
1581 |
/* need a place to store leaders, if any */ |
1582 |
if (nmcces(v) > 0) { |
1583 |
assert(v->mcces != NULL); |
1584 |
if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { |
1585 |
if (v->cv2 != NULL) |
1586 |
free(v->cv2); |
1587 |
v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); |
1588 |
NOERR(); |
1589 |
leads = v->cv2; |
1590 |
} else |
1591 |
leads = clearcvec(v->cv2); |
1592 |
} else |
1593 |
leads = NULL; |
1594 |
|
1595 |
/* first, get the ordinary characters out of the way */ |
1596 |
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { |
1597 |
ch = *p; |
1598 |
if (!ISCELEADER(v, ch)) |
1599 |
newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); |
1600 |
else { |
1601 |
assert(singleton(v->cm, ch)); |
1602 |
assert(leads != NULL); |
1603 |
if (!haschr(leads, ch)) |
1604 |
addchr(leads, ch); |
1605 |
} |
1606 |
} |
1607 |
|
1608 |
/* and the ranges */ |
1609 |
for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { |
1610 |
from = *p; |
1611 |
to = *(p+1); |
1612 |
while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { |
1613 |
if (from < ce) |
1614 |
subrange(v, from, ce - 1, lp, rp); |
1615 |
assert(singleton(v->cm, ce)); |
1616 |
assert(leads != NULL); |
1617 |
if (!haschr(leads, ce)) |
1618 |
addchr(leads, ce); |
1619 |
from = ce + 1; |
1620 |
} |
1621 |
if (from <= to) |
1622 |
subrange(v, from, to, lp, rp); |
1623 |
} |
1624 |
|
1625 |
if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) |
1626 |
return; |
1627 |
|
1628 |
/* deal with the MCCE leaders */ |
1629 |
NOTE(REG_ULOCALE); |
1630 |
for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { |
1631 |
co = GETCOLOR(v->cm, *p); |
1632 |
a = findarc(lp, PLAIN, co); |
1633 |
if (a != NULL) |
1634 |
s = a->to; |
1635 |
else { |
1636 |
s = newstate(v->nfa); |
1637 |
NOERR(); |
1638 |
newarc(v->nfa, PLAIN, co, lp, s); |
1639 |
NOERR(); |
1640 |
} |
1641 |
pa = findarc(v->mccepbegin, PLAIN, co); |
1642 |
assert(pa != NULL); |
1643 |
ps = pa->to; |
1644 |
newarc(v->nfa, '$', 1, s, rp); |
1645 |
newarc(v->nfa, '$', 0, s, rp); |
1646 |
colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); |
1647 |
NOERR(); |
1648 |
} |
1649 |
|
1650 |
/* and the MCCEs */ |
1651 |
for (i = 0; i < cv->nmcces; i++) { |
1652 |
p = cv->mcces[i]; |
1653 |
assert(singleton(v->cm, *p)); |
1654 |
if (!singleton(v->cm, *p)) { |
1655 |
ERR(REG_ASSERT); |
1656 |
return; |
1657 |
} |
1658 |
ch = *p++; |
1659 |
co = GETCOLOR(v->cm, ch); |
1660 |
a = findarc(lp, PLAIN, co); |
1661 |
if (a != NULL) |
1662 |
s = a->to; |
1663 |
else { |
1664 |
s = newstate(v->nfa); |
1665 |
NOERR(); |
1666 |
newarc(v->nfa, PLAIN, co, lp, s); |
1667 |
NOERR(); |
1668 |
} |
1669 |
assert(*p != 0); /* at least two chars */ |
1670 |
assert(singleton(v->cm, *p)); |
1671 |
ch = *p++; |
1672 |
co = GETCOLOR(v->cm, ch); |
1673 |
assert(*p == 0); /* and only two, for now */ |
1674 |
newarc(v->nfa, PLAIN, co, s, rp); |
1675 |
NOERR(); |
1676 |
} |
1677 |
} |
1678 |
|
1679 |
/* |
1680 |
- nextleader - find next MCCE leader within range |
1681 |
^ static celt nextleader(struct vars *, pchr, pchr); |
1682 |
*/ |
1683 |
static celt /* NOCELT means none */ |
1684 |
nextleader(v, from, to) |
1685 |
struct vars *v; |
1686 |
pchr from; |
1687 |
pchr to; |
1688 |
{ |
1689 |
int i; |
1690 |
chr *p; |
1691 |
chr ch; |
1692 |
celt it = NOCELT; |
1693 |
|
1694 |
if (v->mcces == NULL) |
1695 |
return it; |
1696 |
|
1697 |
for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) { |
1698 |
ch = *p; |
1699 |
if (from <= ch && ch <= to) |
1700 |
if (it == NOCELT || ch < it) |
1701 |
it = ch; |
1702 |
} |
1703 |
return it; |
1704 |
} |
1705 |
|
1706 |
/* |
1707 |
- wordchrs - set up word-chr list for word-boundary stuff, if needed |
1708 |
* The list is kept as a bunch of arcs between two dummy states; it's |
1709 |
* disposed of by the unreachable-states sweep in NFA optimization. |
1710 |
* Does NEXT(). Must not be called from any unusual lexical context. |
1711 |
* This should be reconciled with the \w etc. handling in lex.c, and |
1712 |
* should be cleaned up to reduce dependencies on input scanning. |
1713 |
^ static VOID wordchrs(struct vars *); |
1714 |
*/ |
1715 |
static VOID |
1716 |
wordchrs(v) |
1717 |
struct vars *v; |
1718 |
{ |
1719 |
struct state *left; |
1720 |
struct state *right; |
1721 |
|
1722 |
if (v->wordchrs != NULL) { |
1723 |
NEXT(); /* for consistency */ |
1724 |
return; |
1725 |
} |
1726 |
|
1727 |
left = newstate(v->nfa); |
1728 |
right = newstate(v->nfa); |
1729 |
NOERR(); |
1730 |
/* fine point: implemented with [::], and lexer will set REG_ULOCALE */ |
1731 |
lexword(v); |
1732 |
NEXT(); |
1733 |
assert(v->savenow != NULL && SEE('[')); |
1734 |
bracket(v, left, right); |
1735 |
assert((v->savenow != NULL && SEE(']')) || ISERR()); |
1736 |
NEXT(); |
1737 |
NOERR(); |
1738 |
v->wordchrs = left; |
1739 |
} |
1740 |
|
1741 |
/* |
1742 |
- subre - allocate a subre |
1743 |
^ static struct subre *subre(struct vars *, int, int, struct state *, |
1744 |
^ struct state *); |
1745 |
*/ |
1746 |
static struct subre * |
1747 |
subre(v, op, flags, begin, end) |
1748 |
struct vars *v; |
1749 |
int op; |
1750 |
int flags; |
1751 |
struct state *begin; |
1752 |
struct state *end; |
1753 |
{ |
1754 |
struct subre *ret; |
1755 |
|
1756 |
ret = v->treefree; |
1757 |
if (ret != NULL) |
1758 |
v->treefree = ret->left; |
1759 |
else { |
1760 |
ret = (struct subre *)MALLOC(sizeof(struct subre)); |
1761 |
if (ret == NULL) { |
1762 |
ERR(REG_ESPACE); |
1763 |
return NULL; |
1764 |
} |
1765 |
ret->chain = v->treechain; |
1766 |
v->treechain = ret; |
1767 |
} |
1768 |
|
1769 |
assert(strchr("|.b(=", op) != NULL); |
1770 |
|
1771 |
ret->op = op; |
1772 |
ret->flags = flags; |
1773 |
ret->retry = 0; |
1774 |
ret->subno = 0; |
1775 |
ret->min = ret->max = 1; |
1776 |
ret->left = NULL; |
1777 |
ret->right = NULL; |
1778 |
ret->begin = begin; |
1779 |
ret->end = end; |
1780 |
ZAPCNFA(ret->cnfa); |
1781 |
|
1782 |
return ret; |
1783 |
} |
1784 |
|
1785 |
/* |
1786 |
- freesubre - free a subRE subtree |
1787 |
^ static VOID freesubre(struct vars *, struct subre *); |
1788 |
*/ |
1789 |
static VOID |
1790 |
freesubre(v, sr) |
1791 |
struct vars *v; /* might be NULL */ |
1792 |
struct subre *sr; |
1793 |
{ |
1794 |
if (sr == NULL) |
1795 |
return; |
1796 |
|
1797 |
if (sr->left != NULL) |
1798 |
freesubre(v, sr->left); |
1799 |
if (sr->right != NULL) |
1800 |
freesubre(v, sr->right); |
1801 |
|
1802 |
freesrnode(v, sr); |
1803 |
} |
1804 |
|
1805 |
/* |
1806 |
- freesrnode - free one node in a subRE subtree |
1807 |
^ static VOID freesrnode(struct vars *, struct subre *); |
1808 |
*/ |
1809 |
static VOID |
1810 |
freesrnode(v, sr) |
1811 |
struct vars *v; /* might be NULL */ |
1812 |
struct subre *sr; |
1813 |
{ |
1814 |
if (sr == NULL) |
1815 |
return; |
1816 |
|
1817 |
if (!NULLCNFA(sr->cnfa)) |
1818 |
freecnfa(&sr->cnfa); |
1819 |
sr->flags = 0; |
1820 |
|
1821 |
if (v != NULL) { |
1822 |
sr->left = v->treefree; |
1823 |
v->treefree = sr; |
1824 |
} else |
1825 |
FREE(sr); |
1826 |
} |
1827 |
|
1828 |
/* |
1829 |
- optst - optimize a subRE subtree |
1830 |
^ static VOID optst(struct vars *, struct subre *); |
1831 |
*/ |
1832 |
static VOID |
1833 |
optst(v, t) |
1834 |
struct vars *v; |
1835 |
struct subre *t; |
1836 |
{ |
1837 |
if (t == NULL) |
1838 |
return; |
1839 |
|
1840 |
/* recurse through children */ |
1841 |
if (t->left != NULL) |
1842 |
optst(v, t->left); |
1843 |
if (t->right != NULL) |
1844 |
optst(v, t->right); |
1845 |
} |
1846 |
|
1847 |
/* |
1848 |
- numst - number tree nodes (assigning retry indexes) |
1849 |
^ static int numst(struct subre *, int); |
1850 |
*/ |
1851 |
static int /* next number */ |
1852 |
numst(t, start) |
1853 |
struct subre *t; |
1854 |
int start; /* starting point for subtree numbers */ |
1855 |
{ |
1856 |
int i; |
1857 |
|
1858 |
assert(t != NULL); |
1859 |
|
1860 |
i = start; |
1861 |
t->retry = (short)i++; |
1862 |
if (t->left != NULL) |
1863 |
i = numst(t->left, i); |
1864 |
if (t->right != NULL) |
1865 |
i = numst(t->right, i); |
1866 |
return i; |
1867 |
} |
1868 |
|
1869 |
/* |
1870 |
- markst - mark tree nodes as INUSE |
1871 |
^ static VOID markst(struct subre *); |
1872 |
*/ |
1873 |
static VOID |
1874 |
markst(t) |
1875 |
struct subre *t; |
1876 |
{ |
1877 |
assert(t != NULL); |
1878 |
|
1879 |
t->flags |= INUSE; |
1880 |
if (t->left != NULL) |
1881 |
markst(t->left); |
1882 |
if (t->right != NULL) |
1883 |
markst(t->right); |
1884 |
} |
1885 |
|
1886 |
/* |
1887 |
- cleanst - free any tree nodes not marked INUSE |
1888 |
^ static VOID cleanst(struct vars *); |
1889 |
*/ |
1890 |
static VOID |
1891 |
cleanst(v) |
1892 |
struct vars *v; |
1893 |
{ |
1894 |
struct subre *t; |
1895 |
struct subre *next; |
1896 |
|
1897 |
for (t = v->treechain; t != NULL; t = next) { |
1898 |
next = t->chain; |
1899 |
if (!(t->flags&INUSE)) |
1900 |
FREE(t); |
1901 |
} |
1902 |
v->treechain = NULL; |
1903 |
v->treefree = NULL; /* just on general principles */ |
1904 |
} |
1905 |
|
1906 |
/* |
1907 |
- nfatree - turn a subRE subtree into a tree of compacted NFAs |
1908 |
^ static long nfatree(struct vars *, struct subre *, FILE *); |
1909 |
*/ |
1910 |
static long /* optimize results from top node */ |
1911 |
nfatree(v, t, f) |
1912 |
struct vars *v; |
1913 |
struct subre *t; |
1914 |
FILE *f; /* for debug output */ |
1915 |
{ |
1916 |
assert(t != NULL && t->begin != NULL); |
1917 |
|
1918 |
if (t->left != NULL) |
1919 |
(DISCARD)nfatree(v, t->left, f); |
1920 |
if (t->right != NULL) |
1921 |
(DISCARD)nfatree(v, t->right, f); |
1922 |
|
1923 |
return nfanode(v, t, f); |
1924 |
} |
1925 |
|
1926 |
/* |
1927 |
- nfanode - do one NFA for nfatree |
1928 |
^ static long nfanode(struct vars *, struct subre *, FILE *); |
1929 |
*/ |
1930 |
static long /* optimize results */ |
1931 |
nfanode(v, t, f) |
1932 |
struct vars *v; |
1933 |
struct subre *t; |
1934 |
FILE *f; /* for debug output */ |
1935 |
{ |
1936 |
struct nfa *nfa; |
1937 |
long ret = 0; |
1938 |
char idbuf[50]; |
1939 |
|
1940 |
assert(t->begin != NULL); |
1941 |
|
1942 |
if (f != NULL) |
1943 |
fprintf(f, "\n\n\n========= TREE NODE %s ==========\n", |
1944 |
stid(t, idbuf, sizeof(idbuf))); |
1945 |
nfa = newnfa(v, v->cm, v->nfa); |
1946 |
NOERRZ(); |
1947 |
dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); |
1948 |
if (!ISERR()) { |
1949 |
specialcolors(nfa); |
1950 |
ret = optimize(nfa, f); |
1951 |
} |
1952 |
if (!ISERR()) |
1953 |
compact(nfa, &t->cnfa); |
1954 |
|
1955 |
freenfa(nfa); |
1956 |
return ret; |
1957 |
} |
1958 |
|
1959 |
/* |
1960 |
- newlacon - allocate a lookahead-constraint subRE |
1961 |
^ static int newlacon(struct vars *, struct state *, struct state *, int); |
1962 |
*/ |
1963 |
static int /* lacon number */ |
1964 |
newlacon(v, begin, end, pos) |
1965 |
struct vars *v; |
1966 |
struct state *begin; |
1967 |
struct state *end; |
1968 |
int pos; |
1969 |
{ |
1970 |
int n; |
1971 |
struct subre *sub; |
1972 |
|
1973 |
if (v->nlacons == 0) { |
1974 |
v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre)); |
1975 |
n = 1; /* skip 0th */ |
1976 |
v->nlacons = 2; |
1977 |
} else { |
1978 |
v->lacons = (struct subre *)REALLOC(v->lacons, |
1979 |
(v->nlacons+1)*sizeof(struct subre)); |
1980 |
n = v->nlacons++; |
1981 |
} |
1982 |
if (v->lacons == NULL) { |
1983 |
ERR(REG_ESPACE); |
1984 |
return 0; |
1985 |
} |
1986 |
sub = &v->lacons[n]; |
1987 |
sub->begin = begin; |
1988 |
sub->end = end; |
1989 |
sub->subno = pos; |
1990 |
ZAPCNFA(sub->cnfa); |
1991 |
return n; |
1992 |
} |
1993 |
|
1994 |
/* |
1995 |
- freelacons - free lookahead-constraint subRE vector |
1996 |
^ static VOID freelacons(struct subre *, int); |
1997 |
*/ |
1998 |
static VOID |
1999 |
freelacons(subs, n) |
2000 |
struct subre *subs; |
2001 |
int n; |
2002 |
{ |
2003 |
struct subre *sub; |
2004 |
int i; |
2005 |
|
2006 |
assert(n > 0); |
2007 |
for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) /* no 0th */ |
2008 |
if (!NULLCNFA(sub->cnfa)) |
2009 |
freecnfa(&sub->cnfa); |
2010 |
FREE(subs); |
2011 |
} |
2012 |
|
2013 |
/* |
2014 |
- rfree - free a whole RE (insides of regfree) |
2015 |
^ static VOID rfree(regex_t *); |
2016 |
*/ |
2017 |
static VOID |
2018 |
rfree(re) |
2019 |
regex_t *re; |
2020 |
{ |
2021 |
struct guts *g; |
2022 |
|
2023 |
if (re == NULL || re->re_magic != REMAGIC) |
2024 |
return; |
2025 |
|
2026 |
re->re_magic = 0; /* invalidate RE */ |
2027 |
g = (struct guts *)re->re_guts; |
2028 |
re->re_guts = NULL; |
2029 |
re->re_fns = NULL; |
2030 |
g->magic = 0; |
2031 |
freecm(&g->cmap); |
2032 |
if (g->tree != NULL) |
2033 |
freesubre((struct vars *)NULL, g->tree); |
2034 |
if (g->lacons != NULL) |
2035 |
freelacons(g->lacons, g->nlacons); |
2036 |
if (!NULLCNFA(g->search)) |
2037 |
freecnfa(&g->search); |
2038 |
FREE(g); |
2039 |
} |
2040 |
|
2041 |
/* |
2042 |
- dump - dump an RE in human-readable form |
2043 |
^ static VOID dump(regex_t *, FILE *); |
2044 |
*/ |
2045 |
static VOID |
2046 |
dump(re, f) |
2047 |
regex_t *re; |
2048 |
FILE *f; |
2049 |
{ |
2050 |
#ifdef REG_DEBUG |
2051 |
struct guts *g; |
2052 |
int i; |
2053 |
|
2054 |
if (re->re_magic != REMAGIC) |
2055 |
fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic, |
2056 |
REMAGIC); |
2057 |
if (re->re_guts == NULL) { |
2058 |
fprintf(f, "NULL guts!!!\n"); |
2059 |
return; |
2060 |
} |
2061 |
g = (struct guts *)re->re_guts; |
2062 |
if (g->magic != GUTSMAGIC) |
2063 |
fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic, |
2064 |
GUTSMAGIC); |
2065 |
|
2066 |
fprintf(f, "\n\n\n========= DUMP ==========\n"); |
2067 |
fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", |
2068 |
re->re_nsub, re->re_info, re->re_csize, g->ntree); |
2069 |
|
2070 |
dumpcolors(&g->cmap, f); |
2071 |
if (!NULLCNFA(g->search)) { |
2072 |
printf("\nsearch:\n"); |
2073 |
dumpcnfa(&g->search, f); |
2074 |
} |
2075 |
for (i = 1; i < g->nlacons; i++) { |
2076 |
fprintf(f, "\nla%d (%s):\n", i, |
2077 |
(g->lacons[i].subno) ? "positive" : "negative"); |
2078 |
dumpcnfa(&g->lacons[i].cnfa, f); |
2079 |
} |
2080 |
fprintf(f, "\n"); |
2081 |
dumpst(g->tree, f, 0); |
2082 |
#endif |
2083 |
} |
2084 |
|
2085 |
/* |
2086 |
- dumpst - dump a subRE tree |
2087 |
^ static VOID dumpst(struct subre *, FILE *, int); |
2088 |
*/ |
2089 |
static VOID |
2090 |
dumpst(t, f, nfapresent) |
2091 |
struct subre *t; |
2092 |
FILE *f; |
2093 |
int nfapresent; /* is the original NFA still around? */ |
2094 |
{ |
2095 |
if (t == NULL) |
2096 |
fprintf(f, "null tree\n"); |
2097 |
else |
2098 |
stdump(t, f, nfapresent); |
2099 |
fflush(f); |
2100 |
} |
2101 |
|
2102 |
/* |
2103 |
- stdump - recursive guts of dumpst |
2104 |
^ static VOID stdump(struct subre *, FILE *, int); |
2105 |
*/ |
2106 |
static VOID |
2107 |
stdump(t, f, nfapresent) |
2108 |
struct subre *t; |
2109 |
FILE *f; |
2110 |
int nfapresent; /* is the original NFA still around? */ |
2111 |
{ |
2112 |
char idbuf[50]; |
2113 |
|
2114 |
fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); |
2115 |
if (t->flags&LONGER) |
2116 |
fprintf(f, " longest"); |
2117 |
if (t->flags&SHORTER) |
2118 |
fprintf(f, " shortest"); |
2119 |
if (t->flags&MIXED) |
2120 |
fprintf(f, " hasmixed"); |
2121 |
if (t->flags&CAP) |
2122 |
fprintf(f, " hascapture"); |
2123 |
if (t->flags&BACKR) |
2124 |
fprintf(f, " hasbackref"); |
2125 |
if (!(t->flags&INUSE)) |
2126 |
fprintf(f, " UNUSED"); |
2127 |
if (t->subno != 0) |
2128 |
fprintf(f, " (#%d)", t->subno); |
2129 |
if (t->min != 1 || t->max != 1) { |
2130 |
fprintf(f, " {%d,", t->min); |
2131 |
if (t->max != INFINITY) |
2132 |
fprintf(f, "%d", t->max); |
2133 |
fprintf(f, "}"); |
2134 |
} |
2135 |
if (nfapresent) |
2136 |
fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no); |
2137 |
if (t->left != NULL) |
2138 |
fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); |
2139 |
if (t->right != NULL) |
2140 |
fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); |
2141 |
if (!NULLCNFA(t->cnfa)) { |
2142 |
fprintf(f, "\n"); |
2143 |
dumpcnfa(&t->cnfa, f); |
2144 |
fprintf(f, "\n"); |
2145 |
} |
2146 |
if (t->left != NULL) |
2147 |
stdump(t->left, f, nfapresent); |
2148 |
if (t->right != NULL) |
2149 |
stdump(t->right, f, nfapresent); |
2150 |
} |
2151 |
|
2152 |
/* |
2153 |
- stid - identify a subtree node for dumping |
2154 |
^ static char *stid(struct subre *, char *, size_t); |
2155 |
*/ |
2156 |
static char * /* points to buf or constant string */ |
2157 |
stid(t, buf, bufsize) |
2158 |
struct subre *t; |
2159 |
char *buf; |
2160 |
size_t bufsize; |
2161 |
{ |
2162 |
/* big enough for hex int or decimal t->retry? */ |
2163 |
if (bufsize < sizeof(int)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) |
2164 |
return "unable"; |
2165 |
if (t->retry != 0) |
2166 |
sprintf(buf, "%d", t->retry); |
2167 |
else |
2168 |
sprintf(buf, "0x%x", (int)t); /* may lose bits, that's okay */ |
2169 |
return buf; |
2170 |
} |
2171 |
|
2172 |
#include "regc_lex.c" |
2173 |
#include "regc_color.c" |
2174 |
#include "regc_nfa.c" |
2175 |
#include "regc_cvec.c" |
2176 |
#include "regc_locale.c" |
2177 |
|
2178 |
/* End of regcomp.c */ |