/[dtapublic]/projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regcomp.c
ViewVC logotype

Annotation of /projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regcomp.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 71 - (hide annotations) (download)
Sat Nov 5 11:07:06 2016 UTC (7 years, 6 months ago) by dashley
File MIME type: text/plain
File size: 59531 byte(s)
Set EOL properties appropriately to facilitate simultaneous Linux and Windows development.
1 dashley 71 /* $Header$ */
2     /*
3     * re_*comp and friends - compile REs
4     * This file #includes several others (see the bottom).
5     *
6     * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7     *
8     * Development of this software was funded, in part, by Cray Research Inc.,
9     * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
10     * Corporation, none of whom are responsible for the results. The author
11     * thanks all of them.
12     *
13     * Redistribution and use in source and binary forms -- with or without
14     * modification -- are permitted for any purpose, provided that
15     * redistributions in source form retain this entire copyright notice and
16     * indicate the origin and nature of any modifications.
17     *
18     * I'd appreciate being given credit for this package in the documentation
19     * of software which uses it, but that is not a requirement.
20     *
21     * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
22     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
23     * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
24     * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25     * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26     * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27     * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28     * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29     * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30     * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     *
32     */
33    
34     #include "regguts.h"
35    
36     /*
37     * forward declarations, up here so forward datatypes etc. are defined early
38     */
39     /* =====^!^===== begin forwards =====^!^===== */
40     /* automatically gathered by fwd; do not hand-edit */
41     /* === regcomp.c === */
42     int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int));
43     static VOID moresubs _ANSI_ARGS_((struct vars *, int));
44     static int freev _ANSI_ARGS_((struct vars *, int));
45     static VOID makesearch _ANSI_ARGS_((struct vars *, struct nfa *));
46     static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
47     static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int));
48     static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *));
49     static VOID nonword _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));
50     static VOID word _ANSI_ARGS_((struct vars *, int, struct state *, struct state *));
51     static int scannum _ANSI_ARGS_((struct vars *));
52     static VOID repeat _ANSI_ARGS_((struct vars *, struct state *, struct state *, int, int));
53     static VOID bracket _ANSI_ARGS_((struct vars *, struct state *, struct state *));
54     static VOID cbracket _ANSI_ARGS_((struct vars *, struct state *, struct state *));
55     static VOID brackpart _ANSI_ARGS_((struct vars *, struct state *, struct state *));
56     static chr *scanplain _ANSI_ARGS_((struct vars *));
57     static VOID leaders _ANSI_ARGS_((struct vars *, struct cvec *));
58     static VOID onechr _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));
59     static VOID dovec _ANSI_ARGS_((struct vars *, struct cvec *, struct state *, struct state *));
60     static celt nextleader _ANSI_ARGS_((struct vars *, pchr, pchr));
61     static VOID wordchrs _ANSI_ARGS_((struct vars *));
62     static struct subre *subre _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *));
63     static VOID freesubre _ANSI_ARGS_((struct vars *, struct subre *));
64     static VOID freesrnode _ANSI_ARGS_((struct vars *, struct subre *));
65     static VOID optst _ANSI_ARGS_((struct vars *, struct subre *));
66     static int numst _ANSI_ARGS_((struct subre *, int));
67     static VOID markst _ANSI_ARGS_((struct subre *));
68     static VOID cleanst _ANSI_ARGS_((struct vars *));
69     static long nfatree _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
70     static long nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *));
71     static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int));
72     static VOID freelacons _ANSI_ARGS_((struct subre *, int));
73     static VOID rfree _ANSI_ARGS_((regex_t *));
74     static VOID dump _ANSI_ARGS_((regex_t *, FILE *));
75     static VOID dumpst _ANSI_ARGS_((struct subre *, FILE *, int));
76     static VOID stdump _ANSI_ARGS_((struct subre *, FILE *, int));
77     static char *stid _ANSI_ARGS_((struct subre *, char *, size_t));
78     /* === regc_lex.c === */
79     static VOID lexstart _ANSI_ARGS_((struct vars *));
80     static VOID prefixes _ANSI_ARGS_((struct vars *));
81     static VOID lexnest _ANSI_ARGS_((struct vars *, chr *, chr *));
82     static VOID lexword _ANSI_ARGS_((struct vars *));
83     static int next _ANSI_ARGS_((struct vars *));
84     static int lexescape _ANSI_ARGS_((struct vars *));
85     static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int));
86     static int brenext _ANSI_ARGS_((struct vars *, pchr));
87     static VOID skip _ANSI_ARGS_((struct vars *));
88     static chr newline _ANSI_ARGS_((NOPARMS));
89     #ifdef REG_DEBUG
90     static chr *ch _ANSI_ARGS_((NOPARMS));
91     #endif
92     static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr));
93     /* === regc_color.c === */
94     static VOID initcm _ANSI_ARGS_((struct vars *, struct colormap *));
95     static VOID freecm _ANSI_ARGS_((struct colormap *));
96     static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int));
97     static color setcolor _ANSI_ARGS_((struct colormap *, pchr, pcolor));
98     static color maxcolor _ANSI_ARGS_((struct colormap *));
99     static color newcolor _ANSI_ARGS_((struct colormap *));
100     static VOID freecolor _ANSI_ARGS_((struct colormap *, pcolor));
101     static color pseudocolor _ANSI_ARGS_((struct colormap *));
102     static color subcolor _ANSI_ARGS_((struct colormap *, pchr c));
103     static color newsub _ANSI_ARGS_((struct colormap *, pcolor));
104     static VOID subrange _ANSI_ARGS_((struct vars *, pchr, pchr, struct state *, struct state *));
105     static VOID subblock _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *));
106     static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *));
107     static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *));
108     static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *));
109     static int singleton _ANSI_ARGS_((struct colormap *, pchr c));
110     static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *));
111     static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *));
112     #ifdef REG_DEBUG
113     static VOID dumpcolors _ANSI_ARGS_((struct colormap *, FILE *));
114     static VOID fillcheck _ANSI_ARGS_((struct colormap *, union tree *, int, FILE *));
115     static VOID dumpchr _ANSI_ARGS_((pchr, FILE *));
116     #endif
117     /* === regc_nfa.c === */
118     static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *));
119     static VOID freenfa _ANSI_ARGS_((struct nfa *));
120     static struct state *newstate _ANSI_ARGS_((struct nfa *));
121     static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag));
122     static VOID dropstate _ANSI_ARGS_((struct nfa *, struct state *));
123     static VOID freestate _ANSI_ARGS_((struct nfa *, struct state *));
124     static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *));
125     static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *));
126     static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *));
127     static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *));
128     static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor));
129     static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *));
130     static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
131     static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
132     static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
133     static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
134     static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int));
135     static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
136     static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
137     static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, struct state *));
138     static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *));
139     static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *));
140     static VOID specialcolors _ANSI_ARGS_((struct nfa *));
141     static long optimize _ANSI_ARGS_((struct nfa *, FILE *));
142     static VOID pullback _ANSI_ARGS_((struct nfa *, FILE *));
143     static int pull _ANSI_ARGS_((struct nfa *, struct arc *));
144     static VOID pushfwd _ANSI_ARGS_((struct nfa *, FILE *));
145     static int push _ANSI_ARGS_((struct nfa *, struct arc *));
146     #define INCOMPATIBLE 1 /* destroys arc */
147     #define SATISFIED 2 /* constraint satisfied */
148     #define COMPATIBLE 3 /* compatible but not satisfied yet */
149     static int combine _ANSI_ARGS_((struct arc *, struct arc *));
150     static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *));
151     static int unempty _ANSI_ARGS_((struct nfa *, struct arc *));
152     static VOID cleanup _ANSI_ARGS_((struct nfa *));
153     static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
154     static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *));
155     static long analyze _ANSI_ARGS_((struct nfa *));
156     static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *));
157     static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *));
158     static VOID freecnfa _ANSI_ARGS_((struct cnfa *));
159     static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *));
160     #ifdef REG_DEBUG
161     static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *));
162     static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *));
163     static int dumprarcs _ANSI_ARGS_((struct arc *, struct state *, FILE *, int));
164     static VOID dumparc _ANSI_ARGS_((struct arc *, struct state *, FILE *));
165     #endif
166     static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *));
167     #ifdef REG_DEBUG
168     static VOID dumpcstate _ANSI_ARGS_((int, struct carc *, struct cnfa *, FILE *));
169     #endif
170     /* === regc_cvec.c === */
171     static struct cvec *newcvec _ANSI_ARGS_((int, int, int));
172     static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *));
173     static VOID addchr _ANSI_ARGS_((struct cvec *, pchr));
174     static VOID addrange _ANSI_ARGS_((struct cvec *, pchr, pchr));
175     static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *));
176     static int haschr _ANSI_ARGS_((struct cvec *, pchr));
177     static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int, int));
178     static VOID freecvec _ANSI_ARGS_((struct cvec *));
179     /* === regc_locale.c === */
180     static int nmcces _ANSI_ARGS_((struct vars *));
181     static int nleaders _ANSI_ARGS_((struct vars *));
182     static struct cvec *allmcces _ANSI_ARGS_((struct vars *, struct cvec *));
183     static celt element _ANSI_ARGS_((struct vars *, chr *, chr *));
184     static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int));
185     static int before _ANSI_ARGS_((celt, celt));
186     static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int));
187     static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int));
188     static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr));
189     static int cmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
190     static int casecmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t));
191     /* automatically gathered by fwd; do not hand-edit */
192     /* =====^!^===== end forwards =====^!^===== */
193    
194    
195    
196     /* internal variables, bundled for easy passing around */
197     struct vars {
198     regex_t *re;
199     chr *now; /* scan pointer into string */
200     chr *stop; /* end of string */
201     chr *savenow; /* saved now and stop for "subroutine call" */
202     chr *savestop;
203     int err; /* error code (0 if none) */
204     int cflags; /* copy of compile flags */
205     int lasttype; /* type of previous token */
206     int nexttype; /* type of next token */
207     chr nextvalue; /* value (if any) of next token */
208     int lexcon; /* lexical context type (see lex.c) */
209     int nsubexp; /* subexpression count */
210     struct subre **subs; /* subRE pointer vector */
211     size_t nsubs; /* length of vector */
212     struct subre *sub10[10]; /* initial vector, enough for most */
213     struct nfa *nfa; /* the NFA */
214     struct colormap *cm; /* character color map */
215     color nlcolor; /* color of newline */
216     struct state *wordchrs; /* state in nfa holding word-char outarcs */
217     struct subre *tree; /* subexpression tree */
218     struct subre *treechain; /* all tree nodes allocated */
219     struct subre *treefree; /* any free tree nodes */
220     int ntree; /* number of tree nodes */
221     struct cvec *cv; /* interface cvec */
222     struct cvec *cv2; /* utility cvec */
223     struct cvec *mcces; /* collating-element information */
224     # define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c)))
225     struct state *mccepbegin; /* in nfa, start of MCCE prototypes */
226     struct state *mccepend; /* in nfa, end of MCCE prototypes */
227     struct subre *lacons; /* lookahead-constraint vector */
228     int nlacons; /* size of lacons */
229     };
230    
231     /* parsing macros; most know that `v' is the struct vars pointer */
232     #define NEXT() (next(v)) /* advance by one token */
233     #define SEE(t) (v->nexttype == (t)) /* is next token this? */
234     #define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */
235     #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
236     #define ISERR() VISERR(v)
237     #define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\
238     ((vv)->err = (e)))
239     #define ERR(e) VERR(v, e) /* record an error */
240     #define NOERR() {if (ISERR()) return;} /* if error seen, return */
241     #define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */
242     #define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */
243     #define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */
244     #define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */
245     #define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y)
246    
247     /* token type codes, some also used as NFA arc types */
248     #define EMPTY 'n' /* no token present */
249     #define EOS 'e' /* end of string */
250     #define PLAIN 'p' /* ordinary character */
251     #define DIGIT 'd' /* digit (in bound) */
252     #define BACKREF 'b' /* back reference */
253     #define COLLEL 'I' /* start of [. */
254     #define ECLASS 'E' /* start of [= */
255     #define CCLASS 'C' /* start of [: */
256     #define END 'X' /* end of [. [= [: */
257     #define RANGE 'R' /* - within [] which might be range delim. */
258     #define LACON 'L' /* lookahead constraint subRE */
259     #define AHEAD 'a' /* color-lookahead arc */
260     #define BEHIND 'r' /* color-lookbehind arc */
261     #define WBDRY 'w' /* word boundary constraint */
262     #define NWBDRY 'W' /* non-word-boundary constraint */
263     #define SBEGIN 'A' /* beginning of string (even if not BOL) */
264     #define SEND 'Z' /* end of string (even if not EOL) */
265     #define PREFER 'P' /* length preference */
266    
267     /* is an arc colored, and hence on a color chain? */
268     #define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \
269     (a)->type == BEHIND)
270    
271    
272    
273     /* static function list */
274     static struct fns functions = {
275     rfree, /* regfree insides */
276     };
277    
278    
279    
280     /*
281     - compile - compile regular expression
282     ^ int compile(regex_t *, CONST chr *, size_t, int);
283     */
284     int
285     compile(re, string, len, flags)
286     regex_t *re;
287     CONST chr *string;
288     size_t len;
289     int flags;
290     {
291     struct vars var;
292     struct vars *v = &var;
293     struct guts *g;
294     int i;
295     size_t j;
296     FILE *debug = (flags&REG_PROGRESS) ? stdout : (FILE *)NULL;
297     # define CNOERR() { if (ISERR()) return freev(v, v->err); }
298    
299     /* sanity checks */
300    
301     if (re == NULL || string == NULL)
302     return REG_INVARG;
303     if ((flags&REG_QUOTE) &&
304     (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)))
305     return REG_INVARG;
306     if (!(flags&REG_EXTENDED) && (flags&REG_ADVF))
307     return REG_INVARG;
308    
309     /* initial setup (after which freev() is callable) */
310     v->re = re;
311     v->now = (chr *)string;
312     v->stop = v->now + len;
313     v->savenow = v->savestop = NULL;
314     v->err = 0;
315     v->cflags = flags;
316     v->nsubexp = 0;
317     v->subs = v->sub10;
318     v->nsubs = 10;
319     for (j = 0; j < v->nsubs; j++)
320     v->subs[j] = NULL;
321     v->nfa = NULL;
322     v->cm = NULL;
323     v->nlcolor = COLORLESS;
324     v->wordchrs = NULL;
325     v->tree = NULL;
326     v->treechain = NULL;
327     v->treefree = NULL;
328     v->cv = NULL;
329     v->cv2 = NULL;
330     v->mcces = NULL;
331     v->lacons = NULL;
332     v->nlacons = 0;
333     re->re_magic = REMAGIC;
334     re->re_info = 0; /* bits get set during parse */
335     re->re_csize = sizeof(chr);
336     re->re_guts = NULL;
337     re->re_fns = VS(&functions);
338    
339     /* more complex setup, malloced things */
340     re->re_guts = VS(MALLOC(sizeof(struct guts)));
341     if (re->re_guts == NULL)
342     return freev(v, REG_ESPACE);
343     g = (struct guts *)re->re_guts;
344     g->tree = NULL;
345     initcm(v, &g->cmap);
346     v->cm = &g->cmap;
347     g->lacons = NULL;
348     g->nlacons = 0;
349     ZAPCNFA(g->search);
350     v->nfa = newnfa(v, v->cm, (struct nfa *)NULL);
351     CNOERR();
352     v->cv = newcvec(100, 20, 10);
353     if (v->cv == NULL)
354     return freev(v, REG_ESPACE);
355     i = nmcces(v);
356     if (i > 0) {
357     v->mcces = newcvec(nleaders(v), 0, i);
358     CNOERR();
359     v->mcces = allmcces(v, v->mcces);
360     leaders(v, v->mcces);
361     addmcce(v->mcces, (chr *)NULL, (chr *)NULL); /* dummy */
362     }
363     CNOERR();
364    
365     /* parsing */
366     lexstart(v); /* also handles prefixes */
367     if ((v->cflags&REG_NLSTOP) || (v->cflags&REG_NLANCH)) {
368     /* assign newline a unique color */
369     v->nlcolor = subcolor(v->cm, newline());
370     okcolors(v->nfa, v->cm);
371     }
372     CNOERR();
373     v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
374     assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */
375     CNOERR();
376     assert(v->tree != NULL);
377    
378     /* finish setup of nfa and its subre tree */
379     specialcolors(v->nfa);
380     CNOERR();
381     if (debug != NULL) {
382     fprintf(debug, "\n\n\n========= RAW ==========\n");
383     dumpnfa(v->nfa, debug);
384     dumpst(v->tree, debug, 1);
385     }
386     optst(v, v->tree);
387     v->ntree = numst(v->tree, 1);
388     markst(v->tree);
389     cleanst(v);
390     if (debug != NULL) {
391     fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");
392     dumpst(v->tree, debug, 1);
393     }
394    
395     /* build compacted NFAs for tree and lacons */
396     re->re_info |= nfatree(v, v->tree, debug);
397     CNOERR();
398     assert(v->nlacons == 0 || v->lacons != NULL);
399     for (i = 1; i < v->nlacons; i++) {
400     if (debug != NULL)
401     fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
402     nfanode(v, &v->lacons[i], debug);
403     }
404     CNOERR();
405     if (v->tree->flags&SHORTER)
406     NOTE(REG_USHORTEST);
407    
408     /* build compacted NFAs for tree, lacons, fast search */
409     if (debug != NULL)
410     fprintf(debug, "\n\n\n========= SEARCH ==========\n");
411     /* can sacrifice main NFA now, so use it as work area */
412     (DISCARD)optimize(v->nfa, debug);
413     CNOERR();
414     makesearch(v, v->nfa);
415     CNOERR();
416     compact(v->nfa, &g->search);
417     CNOERR();
418    
419     /* looks okay, package it up */
420     re->re_nsub = v->nsubexp;
421     v->re = NULL; /* freev no longer frees re */
422     g->magic = GUTSMAGIC;
423     g->cflags = v->cflags;
424     g->info = re->re_info;
425     g->nsub = re->re_nsub;
426     g->tree = v->tree;
427     v->tree = NULL;
428     g->ntree = v->ntree;
429     g->compare = (v->cflags&REG_ICASE) ? casecmp : cmp;
430     g->lacons = v->lacons;
431     v->lacons = NULL;
432     g->nlacons = v->nlacons;
433    
434     if (flags&REG_DUMP)
435     dump(re, stdout);
436    
437     assert(v->err == 0);
438     return freev(v, 0);
439     }
440    
441     /*
442     - moresubs - enlarge subRE vector
443     ^ static VOID moresubs(struct vars *, int);
444     */
445     static VOID
446     moresubs(v, wanted)
447     struct vars *v;
448     int wanted; /* want enough room for this one */
449     {
450     struct subre **p;
451     size_t n;
452    
453     assert(wanted > 0 && (size_t)wanted >= v->nsubs);
454     n = (size_t)wanted * 3 / 2 + 1;
455     if (v->subs == v->sub10) {
456     p = (struct subre **)MALLOC(n * sizeof(struct subre *));
457     if (p != NULL)
458     memcpy(VS(p), VS(v->subs),
459     v->nsubs * sizeof(struct subre *));
460     } else
461     p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *));
462     if (p == NULL) {
463     ERR(REG_ESPACE);
464     return;
465     }
466     v->subs = p;
467     for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++)
468     *p = NULL;
469     assert(v->nsubs == n);
470     assert((size_t)wanted < v->nsubs);
471     }
472    
473     /*
474     - freev - free vars struct's substructures where necessary
475     * Optionally does error-number setting, and always returns error code
476     * (if any), to make error-handling code terser.
477     ^ static int freev(struct vars *, int);
478     */
479     static int
480     freev(v, err)
481     struct vars *v;
482     int err;
483     {
484     if (v->re != NULL)
485     rfree(v->re);
486     if (v->subs != v->sub10)
487     FREE(v->subs);
488     if (v->nfa != NULL)
489     freenfa(v->nfa);
490     if (v->tree != NULL)
491     freesubre(v, v->tree);
492     if (v->treechain != NULL)
493     cleanst(v);
494     if (v->cv != NULL)
495     freecvec(v->cv);
496     if (v->cv2 != NULL)
497     freecvec(v->cv2);
498     if (v->mcces != NULL)
499     freecvec(v->mcces);
500     if (v->lacons != NULL)
501     freelacons(v->lacons, v->nlacons);
502     ERR(err); /* nop if err==0 */
503    
504     return v->err;
505     }
506    
507     /*
508     - makesearch - turn an NFA into a search NFA (implicit prepend of .*?)
509     * NFA must have been optimize()d already.
510     ^ static VOID makesearch(struct vars *, struct nfa *);
511     */
512     static VOID
513     makesearch(v, nfa)
514     struct vars *v;
515     struct nfa *nfa;
516     {
517     struct arc *a;
518     struct arc *b;
519     struct state *pre = nfa->pre;
520     struct state *s;
521     struct state *s2;
522     struct state *slist;
523    
524     /* no loops are needed if it's anchored */
525     for (a = pre->outs; a != NULL; a = a->outchain) {
526     assert(a->type == PLAIN);
527     if (a->co != nfa->bos[0] && a->co != nfa->bos[1])
528     break;
529     }
530     if (a != NULL) {
531     /* add implicit .* in front */
532     rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
533    
534     /* and ^* and \A* too -- not always necessary, but harmless */
535     newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
536     newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
537     }
538    
539     /*
540     * Now here's the subtle part. Because many REs have no lookback
541     * constraints, often knowing when you were in the pre state tells
542     * you little; it's the next state(s) that are informative. But
543     * some of them may have other inarcs, i.e. it may be possible to
544     * make actual progress and then return to one of them. We must
545     * de-optimize such cases, splitting each such state into progress
546     * and no-progress states.
547     */
548    
549     /* first, make a list of the states */
550     slist = NULL;
551     for (a = pre->outs; a != NULL; a = a->outchain) {
552     s = a->to;
553     for (b = s->ins; b != NULL; b = b->inchain)
554     if (b->from != pre)
555     break;
556     if (b != NULL) { /* must be split */
557     s->tmp = slist;
558     slist = s;
559     }
560     }
561    
562     /* do the splits */
563     for (s = slist; s != NULL; s = s2) {
564     s2 = newstate(nfa);
565     copyouts(nfa, s, s2);
566     for (a = s->ins; a != NULL; a = b) {
567     b = a->inchain;
568     if (a->from != pre) {
569     cparc(nfa, a, a->from, s2);
570     freearc(nfa, a);
571     }
572     }
573     s2 = s->tmp;
574     s->tmp = NULL; /* clean up while we're at it */
575     }
576     }
577    
578     /*
579     - parse - parse an RE
580     * This is actually just the top level, which parses a bunch of branches
581     * tied together with '|'. They appear in the tree as the left children
582     * of a chain of '|' subres.
583     ^ static struct subre *parse(struct vars *, int, int, struct state *,
584     ^ struct state *);
585     */
586     static struct subre *
587     parse(v, stopper, type, init, final)
588     struct vars *v;
589     int stopper; /* EOS or ')' */
590     int type; /* LACON (lookahead subRE) or PLAIN */
591     struct state *init; /* initial state */
592     struct state *final; /* final state */
593     {
594     struct state *left; /* scaffolding for branch */
595     struct state *right;
596     struct subre *branches; /* top level */
597     struct subre *branch; /* current branch */
598     struct subre *t; /* temporary */
599     int firstbranch; /* is this the first branch? */
600    
601     assert(stopper == ')' || stopper == EOS);
602    
603     branches = subre(v, '|', LONGER, init, final);
604     NOERRN();
605     branch = branches;
606     firstbranch = 1;
607     do { /* a branch */
608     if (!firstbranch) {
609     /* need a place to hang it */
610     branch->right = subre(v, '|', LONGER, init, final);
611     NOERRN();
612     branch = branch->right;
613     }
614     firstbranch = 0;
615     left = newstate(v->nfa);
616     right = newstate(v->nfa);
617     NOERRN();
618     EMPTYARC(init, left);
619     EMPTYARC(right, final);
620     NOERRN();
621     branch->left = parsebranch(v, stopper, type, left, right, 0);
622     NOERRN();
623     branch->flags |= UP(branch->flags | branch->left->flags);
624     if ((branch->flags &~ branches->flags) != 0) /* new flags */
625     for (t = branches; t != branch; t = t->right)
626     t->flags |= branch->flags;
627     } while (EAT('|'));
628     assert(SEE(stopper) || SEE(EOS));
629    
630     if (!SEE(stopper)) {
631     assert(stopper == ')' && SEE(EOS));
632     ERR(REG_EPAREN);
633     }
634    
635     /* optimize out simple cases */
636     if (branch == branches) { /* only one branch */
637     assert(branch->right == NULL);
638     t = branch->left;
639     branch->left = NULL;
640     freesubre(v, branches);
641     branches = t;
642     } else if (!MESSY(branches->flags)) { /* no interesting innards */
643     freesubre(v, branches->left);
644     branches->left = NULL;
645     freesubre(v, branches->right);
646     branches->right = NULL;
647     branches->op = '=';
648     }
649    
650     return branches;
651     }
652    
653     /*
654     - parsebranch - parse one branch of an RE
655     * This mostly manages concatenation, working closely with parseqatom().
656     * Concatenated things are bundled up as much as possible, with separate
657     * ',' nodes introduced only when necessary due to substructure.
658     ^ static struct subre *parsebranch(struct vars *, int, int, struct state *,
659     ^ struct state *, int);
660     */
661     static struct subre *
662     parsebranch(v, stopper, type, left, right, partial)
663     struct vars *v;
664     int stopper; /* EOS or ')' */
665     int type; /* LACON (lookahead subRE) or PLAIN */
666     struct state *left; /* leftmost state */
667     struct state *right; /* rightmost state */
668     int partial; /* is this only part of a branch? */
669     {
670     struct state *lp; /* left end of current construct */
671     int seencontent; /* is there anything in this branch yet? */
672     struct subre *t;
673    
674     lp = left;
675     seencontent = 0;
676     t = subre(v, '=', 0, left, right); /* op '=' is tentative */
677     NOERRN();
678     while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {
679     if (seencontent) { /* implicit concat operator */
680     lp = newstate(v->nfa);
681     NOERRN();
682     moveins(v->nfa, right, lp);
683     }
684     seencontent = 1;
685    
686     /* NB, recursion in parseqatom() may swallow rest of branch */
687     parseqatom(v, stopper, type, lp, right, t);
688     }
689    
690     if (!seencontent) { /* empty branch */
691     if (!partial)
692     NOTE(REG_UUNSPEC);
693     assert(lp == left);
694     EMPTYARC(left, right);
695     }
696    
697     return t;
698     }
699    
700     /*
701     - parseqatom - parse one quantified atom or constraint of an RE
702     * The bookkeeping near the end cooperates very closely with parsebranch();
703     * in particular, it contains a recursion that can involve parsing the rest
704     * of the branch, making this function's name somewhat inaccurate.
705     ^ static VOID parseqatom(struct vars *, int, int, struct state *,
706     ^ struct state *, struct subre *);
707     */
708     static VOID
709     parseqatom(v, stopper, type, lp, rp, top)
710     struct vars *v;
711     int stopper; /* EOS or ')' */
712     int type; /* LACON (lookahead subRE) or PLAIN */
713     struct state *lp; /* left state to hang it on */
714     struct state *rp; /* right state to hang it on */
715     struct subre *top; /* subtree top */
716     {
717     struct state *s; /* temporaries for new states */
718     struct state *s2;
719     # define ARCV(t, val) newarc(v->nfa, t, val, lp, rp)
720     int m, n;
721     struct subre *atom; /* atom's subtree */
722     struct subre *t;
723     int cap; /* capturing parens? */
724     int pos; /* positive lookahead? */
725     int subno; /* capturing-parens or backref number */
726     int atomtype;
727     int qprefer; /* quantifier short/long preference */
728     int f;
729     struct subre **atomp; /* where the pointer to atom is */
730    
731     /* initial bookkeeping */
732     atom = NULL;
733     assert(lp->nouts == 0); /* must string new code */
734     assert(rp->nins == 0); /* between lp and rp */
735     subno = 0; /* just to shut lint up */
736    
737     /* an atom or constraint... */
738     atomtype = v->nexttype;
739     switch (atomtype) {
740     /* first, constraints, which end by returning */
741     case '^':
742     ARCV('^', 1);
743     if (v->cflags&REG_NLANCH)
744     ARCV(BEHIND, v->nlcolor);
745     NEXT();
746     return;
747     break;
748     case '$':
749     ARCV('$', 1);
750     if (v->cflags&REG_NLANCH)
751     ARCV(AHEAD, v->nlcolor);
752     NEXT();
753     return;
754     break;
755     case SBEGIN:
756     ARCV('^', 1); /* BOL */
757     ARCV('^', 0); /* or BOS */
758     NEXT();
759     return;
760     break;
761     case SEND:
762     ARCV('$', 1); /* EOL */
763     ARCV('$', 0); /* or EOS */
764     NEXT();
765     return;
766     break;
767     case '<':
768     wordchrs(v); /* does NEXT() */
769     s = newstate(v->nfa);
770     NOERR();
771     nonword(v, BEHIND, lp, s);
772     word(v, AHEAD, s, rp);
773     return;
774     break;
775     case '>':
776     wordchrs(v); /* does NEXT() */
777     s = newstate(v->nfa);
778     NOERR();
779     word(v, BEHIND, lp, s);
780     nonword(v, AHEAD, s, rp);
781     return;
782     break;
783     case WBDRY:
784     wordchrs(v); /* does NEXT() */
785     s = newstate(v->nfa);
786     NOERR();
787     nonword(v, BEHIND, lp, s);
788     word(v, AHEAD, s, rp);
789     s = newstate(v->nfa);
790     NOERR();
791     word(v, BEHIND, lp, s);
792     nonword(v, AHEAD, s, rp);
793     return;
794     break;
795     case NWBDRY:
796     wordchrs(v); /* does NEXT() */
797     s = newstate(v->nfa);
798     NOERR();
799     word(v, BEHIND, lp, s);
800     word(v, AHEAD, s, rp);
801     s = newstate(v->nfa);
802     NOERR();
803     nonword(v, BEHIND, lp, s);
804     nonword(v, AHEAD, s, rp);
805     return;
806     break;
807     case LACON: /* lookahead constraint */
808     pos = v->nextvalue;
809     NEXT();
810     s = newstate(v->nfa);
811     s2 = newstate(v->nfa);
812     NOERR();
813     t = parse(v, ')', LACON, s, s2);
814     freesubre(v, t); /* internal structure irrelevant */
815     assert(SEE(')') || ISERR());
816     NEXT();
817     n = newlacon(v, s, s2, pos);
818     NOERR();
819     ARCV(LACON, n);
820     return;
821     break;
822     /* then errors, to get them out of the way */
823     case '*':
824     case '+':
825     case '?':
826     case '{':
827     ERR(REG_BADRPT);
828     return;
829     break;
830     default:
831     ERR(REG_ASSERT);
832     return;
833     break;
834     /* then plain characters, and minor variants on that theme */
835     case ')': /* unbalanced paren */
836     if ((v->cflags&REG_ADVANCED) != REG_EXTENDED) {
837     ERR(REG_EPAREN);
838     return;
839     }
840     /* legal in EREs due to specification botch */
841     NOTE(REG_UPBOTCH);
842     /* fallthrough into case PLAIN */
843     case PLAIN:
844     onechr(v, v->nextvalue, lp, rp);
845     okcolors(v->nfa, v->cm);
846     NOERR();
847     NEXT();
848     break;
849     case '[':
850     if (v->nextvalue == 1)
851     bracket(v, lp, rp);
852     else
853     cbracket(v, lp, rp);
854     assert(SEE(']') || ISERR());
855     NEXT();
856     break;
857     case '.':
858     rainbow(v->nfa, v->cm, PLAIN,
859     (v->cflags&REG_NLSTOP) ? v->nlcolor : COLORLESS,
860     lp, rp);
861     NEXT();
862     break;
863     /* and finally the ugly stuff */
864     case '(': /* value flags as capturing or non */
865     cap = (type == LACON) ? 0 : v->nextvalue;
866     if (cap) {
867     v->nsubexp++;
868     subno = v->nsubexp;
869     if ((size_t)subno >= v->nsubs)
870     moresubs(v, subno);
871     assert((size_t)subno < v->nsubs);
872     } else
873     atomtype = PLAIN; /* something that's not '(' */
874     NEXT();
875     /* need new endpoints because tree will contain pointers */
876     s = newstate(v->nfa);
877     s2 = newstate(v->nfa);
878     NOERR();
879     EMPTYARC(lp, s);
880     EMPTYARC(s2, rp);
881     NOERR();
882     atom = parse(v, ')', PLAIN, s, s2);
883     assert(SEE(')') || ISERR());
884     NEXT();
885     NOERR();
886     if (cap) {
887     v->subs[subno] = atom;
888     t = subre(v, '(', atom->flags|CAP, lp, rp);
889     NOERR();
890     t->subno = subno;
891     t->left = atom;
892     atom = t;
893     }
894     /* postpone everything else pending possible {0} */
895     break;
896     case BACKREF: /* the Feature From The Black Lagoon */
897     INSIST(type != LACON, REG_ESUBREG);
898     INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
899     INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
900     NOERR();
901     assert(v->nextvalue > 0);
902     atom = subre(v, 'b', BACKR, lp, rp);
903     subno = v->nextvalue;
904     atom->subno = subno;
905     EMPTYARC(lp, rp); /* temporarily, so there's something */
906     NEXT();
907     break;
908     }
909    
910     /* ...and an atom may be followed by a quantifier */
911     switch (v->nexttype) {
912     case '*':
913     m = 0;
914     n = INFINITY;
915     qprefer = (v->nextvalue) ? LONGER : SHORTER;
916     NEXT();
917     break;
918     case '+':
919     m = 1;
920     n = INFINITY;
921     qprefer = (v->nextvalue) ? LONGER : SHORTER;
922     NEXT();
923     break;
924     case '?':
925     m = 0;
926     n = 1;
927     qprefer = (v->nextvalue) ? LONGER : SHORTER;
928     NEXT();
929     break;
930     case '{':
931     NEXT();
932     m = scannum(v);
933     if (EAT(',')) {
934     if (SEE(DIGIT))
935     n = scannum(v);
936     else
937     n = INFINITY;
938     if (m > n) {
939     ERR(REG_BADBR);
940     return;
941     }
942     /* {m,n} exercises preference, even if it's {m,m} */
943     qprefer = (v->nextvalue) ? LONGER : SHORTER;
944     } else {
945     n = m;
946     /* {m} passes operand's preference through */
947     qprefer = 0;
948     }
949     if (!SEE('}')) { /* catches errors too */
950     ERR(REG_BADBR);
951     return;
952     }
953     NEXT();
954     break;
955     default: /* no quantifier */
956     m = n = 1;
957     qprefer = 0;
958     break;
959     }
960    
961     /* annoying special case: {0} or {0,0} cancels everything */
962     if (m == 0 && n == 0) {
963     if (atom != NULL)
964     freesubre(v, atom);
965     if (atomtype == '(')
966     v->subs[subno] = NULL;
967     delsub(v->nfa, lp, rp);
968     EMPTYARC(lp, rp);
969     return;
970     }
971    
972     /* if not a messy case, avoid hard part */
973     assert(!MESSY(top->flags));
974     f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);
975     if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) {
976     if (!(m == 1 && n == 1))
977     repeat(v, lp, rp, m, n);
978     if (atom != NULL)
979     freesubre(v, atom);
980     top->flags = f;
981     return;
982     }
983    
984     /*
985     * hard part: something messy
986     * That is, capturing parens, back reference, short/long clash, or
987     * an atom with substructure containing one of those.
988     */
989    
990     /* now we'll need a subre for the contents even if they're boring */
991     if (atom == NULL) {
992     atom = subre(v, '=', 0, lp, rp);
993     NOERR();
994     }
995    
996     /*
997     * prepare a general-purpose state skeleton
998     *
999     * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]
1000     * / /
1001     * [lp] ----> [s2] ----bypass---------------------
1002     *
1003     * where bypass is an empty, and prefix is some repetitions of atom
1004     */
1005     s = newstate(v->nfa); /* first, new endpoints for the atom */
1006     s2 = newstate(v->nfa);
1007     NOERR();
1008     moveouts(v->nfa, lp, s);
1009     moveins(v->nfa, rp, s2);
1010     NOERR();
1011     atom->begin = s;
1012     atom->end = s2;
1013     s = newstate(v->nfa); /* and spots for prefix and bypass */
1014     s2 = newstate(v->nfa);
1015     NOERR();
1016     EMPTYARC(lp, s);
1017     EMPTYARC(lp, s2);
1018     NOERR();
1019    
1020     /* break remaining subRE into x{...} and what follows */
1021     t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
1022     t->left = atom;
1023     atomp = &t->left;
1024     /* here we should recurse... but we must postpone that to the end */
1025    
1026     /* split top into prefix and remaining */
1027     assert(top->op == '=' && top->left == NULL && top->right == NULL);
1028     top->left = subre(v, '=', top->flags, top->begin, lp);
1029     top->op = '.';
1030     top->right = t;
1031    
1032     /* if it's a backref, now is the time to replicate the subNFA */
1033     if (atomtype == BACKREF) {
1034     assert(atom->begin->nouts == 1); /* just the EMPTY */
1035     delsub(v->nfa, atom->begin, atom->end);
1036     assert(v->subs[subno] != NULL);
1037     /* and here's why the recursion got postponed: it must */
1038     /* wait until the skeleton is filled in, because it may */
1039     /* hit a backref that wants to copy the filled-in skeleton */
1040     dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
1041     atom->begin, atom->end);
1042     NOERR();
1043     }
1044    
1045     /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */
1046     if (m == 0) {
1047     EMPTYARC(s2, atom->end); /* the bypass */
1048     assert(PREF(qprefer) != 0);
1049     f = COMBINE(qprefer, atom->flags);
1050     t = subre(v, '|', f, lp, atom->end);
1051     NOERR();
1052     t->left = atom;
1053     t->right = subre(v, '|', PREF(f), s2, atom->end);
1054     NOERR();
1055     t->right->left = subre(v, '=', 0, s2, atom->end);
1056     NOERR();
1057     *atomp = t;
1058     atomp = &t->left;
1059     m = 1;
1060     }
1061    
1062     /* deal with the rest of the quantifier */
1063     if (atomtype == BACKREF) {
1064     /* special case: backrefs have internal quantifiers */
1065     EMPTYARC(s, atom->begin); /* empty prefix */
1066     /* just stuff everything into atom */
1067     repeat(v, atom->begin, atom->end, m, n);
1068     atom->min = (short)m;
1069     atom->max = (short)n;
1070     atom->flags |= COMBINE(qprefer, atom->flags);
1071     } else if (m == 1 && n == 1) {
1072     /* no/vacuous quantifier: done */
1073     EMPTYARC(s, atom->begin); /* empty prefix */
1074     } else {
1075     /* turn x{m,n} into x{m-1,n-1}x, with capturing */
1076     /* parens in only second x */
1077     dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
1078     assert(m >= 1 && m != INFINITY && n >= 1);
1079     repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1);
1080     f = COMBINE(qprefer, atom->flags);
1081     t = subre(v, '.', f, s, atom->end); /* prefix and atom */
1082     NOERR();
1083     t->left = subre(v, '=', PREF(f), s, atom->begin);
1084     NOERR();
1085     t->right = atom;
1086     *atomp = t;
1087     }
1088    
1089     /* and finally, look after that postponed recursion */
1090     t = top->right;
1091     if (!(SEE('|') || SEE(stopper) || SEE(EOS)))
1092     t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
1093     else {
1094     EMPTYARC(atom->end, rp);
1095     t->right = subre(v, '=', 0, atom->end, rp);
1096     }
1097     assert(SEE('|') || SEE(stopper) || SEE(EOS));
1098     t->flags |= COMBINE(t->flags, t->right->flags);
1099     top->flags |= COMBINE(top->flags, t->flags);
1100     }
1101    
1102     /*
1103     - nonword - generate arcs for non-word-character ahead or behind
1104     ^ static VOID nonword(struct vars *, int, struct state *, struct state *);
1105     */
1106     static VOID
1107     nonword(v, dir, lp, rp)
1108     struct vars *v;
1109     int dir; /* AHEAD or BEHIND */
1110     struct state *lp;
1111     struct state *rp;
1112     {
1113     int anchor = (dir == AHEAD) ? '$' : '^';
1114    
1115     assert(dir == AHEAD || dir == BEHIND);
1116     newarc(v->nfa, anchor, 1, lp, rp);
1117     newarc(v->nfa, anchor, 0, lp, rp);
1118     colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
1119     /* (no need for special attention to \n) */
1120     }
1121    
1122     /*
1123     - word - generate arcs for word character ahead or behind
1124     ^ static VOID word(struct vars *, int, struct state *, struct state *);
1125     */
1126     static VOID
1127     word(v, dir, lp, rp)
1128     struct vars *v;
1129     int dir; /* AHEAD or BEHIND */
1130     struct state *lp;
1131     struct state *rp;
1132     {
1133     assert(dir == AHEAD || dir == BEHIND);
1134     cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
1135     /* (no need for special attention to \n) */
1136     }
1137    
1138     /*
1139     - scannum - scan a number
1140     ^ static int scannum(struct vars *);
1141     */
1142     static int /* value, <= DUPMAX */
1143     scannum(v)
1144     struct vars *v;
1145     {
1146     int n = 0;
1147    
1148     while (SEE(DIGIT) && n < DUPMAX) {
1149     n = n*10 + v->nextvalue;
1150     NEXT();
1151     }
1152     if (SEE(DIGIT) || n > DUPMAX) {
1153     ERR(REG_BADBR);
1154     return 0;
1155     }
1156     return n;
1157     }
1158    
1159     /*
1160     - repeat - replicate subNFA for quantifiers
1161     * The duplication sequences used here are chosen carefully so that any
1162     * pointers starting out pointing into the subexpression end up pointing into
1163     * the last occurrence. (Note that it may not be strung between the same
1164     * left and right end states, however!) This used to be important for the
1165     * subRE tree, although the important bits are now handled by the in-line
1166     * code in parse(), and when this is called, it doesn't matter any more.
1167     ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int);
1168     */
1169     static VOID
1170     repeat(v, lp, rp, m, n)
1171     struct vars *v;
1172     struct state *lp;
1173     struct state *rp;
1174     int m;
1175     int n;
1176     {
1177     # define SOME 2
1178     # define INF 3
1179     # define PAIR(x, y) ((x)*4 + (y))
1180     # define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
1181     CONST int rm = REDUCE(m);
1182     CONST int rn = REDUCE(n);
1183     struct state *s;
1184     struct state *s2;
1185    
1186     switch (PAIR(rm, rn)) {
1187     case PAIR(0, 0): /* empty string */
1188     delsub(v->nfa, lp, rp);
1189     EMPTYARC(lp, rp);
1190     break;
1191     case PAIR(0, 1): /* do as x| */
1192     EMPTYARC(lp, rp);
1193     break;
1194     case PAIR(0, SOME): /* do as x{1,n}| */
1195     repeat(v, lp, rp, 1, n);
1196     NOERR();
1197     EMPTYARC(lp, rp);
1198     break;
1199     case PAIR(0, INF): /* loop x around */
1200     s = newstate(v->nfa);
1201     NOERR();
1202     moveouts(v->nfa, lp, s);
1203     moveins(v->nfa, rp, s);
1204     EMPTYARC(lp, s);
1205     EMPTYARC(s, rp);
1206     break;
1207     case PAIR(1, 1): /* no action required */
1208     break;
1209     case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */
1210     s = newstate(v->nfa);
1211     NOERR();
1212     moveouts(v->nfa, lp, s);
1213     dupnfa(v->nfa, s, rp, lp, s);
1214     NOERR();
1215     repeat(v, lp, s, 1, n-1);
1216     NOERR();
1217     EMPTYARC(lp, s);
1218     break;
1219     case PAIR(1, INF): /* add loopback arc */
1220     s = newstate(v->nfa);
1221     s2 = newstate(v->nfa);
1222     NOERR();
1223     moveouts(v->nfa, lp, s);
1224     moveins(v->nfa, rp, s2);
1225     EMPTYARC(lp, s);
1226     EMPTYARC(s2, rp);
1227     EMPTYARC(s2, s);
1228     break;
1229     case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */
1230     s = newstate(v->nfa);
1231     NOERR();
1232     moveouts(v->nfa, lp, s);
1233     dupnfa(v->nfa, s, rp, lp, s);
1234     NOERR();
1235     repeat(v, lp, s, m-1, n-1);
1236     break;
1237     case PAIR(SOME, INF): /* do as x{m-1,}x */
1238     s = newstate(v->nfa);
1239     NOERR();
1240     moveouts(v->nfa, lp, s);
1241     dupnfa(v->nfa, s, rp, lp, s);
1242     NOERR();
1243     repeat(v, lp, s, m-1, n);
1244     break;
1245     default:
1246     ERR(REG_ASSERT);
1247     break;
1248     }
1249     }
1250    
1251     /*
1252     - bracket - handle non-complemented bracket expression
1253     * Also called from cbracket for complemented bracket expressions.
1254     ^ static VOID bracket(struct vars *, struct state *, struct state *);
1255     */
1256     static VOID
1257     bracket(v, lp, rp)
1258     struct vars *v;
1259     struct state *lp;
1260     struct state *rp;
1261     {
1262     assert(SEE('['));
1263     NEXT();
1264     while (!SEE(']') && !SEE(EOS))
1265     brackpart(v, lp, rp);
1266     assert(SEE(']') || ISERR());
1267     okcolors(v->nfa, v->cm);
1268     }
1269    
1270     /*
1271     - cbracket - handle complemented bracket expression
1272     * We do it by calling bracket() with dummy endpoints, and then complementing
1273     * the result. The alternative would be to invoke rainbow(), and then delete
1274     * arcs as the b.e. is seen... but that gets messy.
1275     ^ static VOID cbracket(struct vars *, struct state *, struct state *);
1276     */
1277     static VOID
1278     cbracket(v, lp, rp)
1279     struct vars *v;
1280     struct state *lp;
1281     struct state *rp;
1282     {
1283     struct state *left = newstate(v->nfa);
1284     struct state *right = newstate(v->nfa);
1285     struct state *s;
1286     struct arc *a; /* arc from lp */
1287     struct arc *ba; /* arc from left, from bracket() */
1288     struct arc *pa; /* MCCE-prototype arc */
1289     color co;
1290     chr *p;
1291     int i;
1292    
1293     NOERR();
1294     bracket(v, left, right);
1295     if (v->cflags&REG_NLSTOP)
1296     newarc(v->nfa, PLAIN, v->nlcolor, left, right);
1297     NOERR();
1298    
1299     assert(lp->nouts == 0); /* all outarcs will be ours */
1300    
1301     /* easy part of complementing */
1302     colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
1303     NOERR();
1304     if (v->mcces == NULL) { /* no MCCEs -- we're done */
1305     dropstate(v->nfa, left);
1306     assert(right->nins == 0);
1307     freestate(v->nfa, right);
1308     return;
1309     }
1310    
1311     /* but complementing gets messy in the presence of MCCEs... */
1312     NOTE(REG_ULOCALE);
1313     for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {
1314     co = GETCOLOR(v->cm, *p);
1315     a = findarc(lp, PLAIN, co);
1316     ba = findarc(left, PLAIN, co);
1317     if (ba == NULL) {
1318     assert(a != NULL);
1319     freearc(v->nfa, a);
1320     } else {
1321     assert(a == NULL);
1322     }
1323     s = newstate(v->nfa);
1324     NOERR();
1325     newarc(v->nfa, PLAIN, co, lp, s);
1326     NOERR();
1327     pa = findarc(v->mccepbegin, PLAIN, co);
1328     assert(pa != NULL);
1329     if (ba == NULL) { /* easy case, need all of them */
1330     cloneouts(v->nfa, pa->to, s, rp, PLAIN);
1331     newarc(v->nfa, '$', 1, s, rp);
1332     newarc(v->nfa, '$', 0, s, rp);
1333     colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);
1334     } else { /* must be selective */
1335     if (findarc(ba->to, '$', 1) == NULL) {
1336     newarc(v->nfa, '$', 1, s, rp);
1337     newarc(v->nfa, '$', 0, s, rp);
1338     colorcomplement(v->nfa, v->cm, AHEAD, pa->to,
1339     s, rp);
1340     }
1341     for (pa = pa->to->outs; pa != NULL; pa = pa->outchain)
1342     if (findarc(ba->to, PLAIN, pa->co) == NULL)
1343     newarc(v->nfa, PLAIN, pa->co, s, rp);
1344     if (s->nouts == 0) /* limit of selectivity: none */
1345     dropstate(v->nfa, s); /* frees arc too */
1346     }
1347     NOERR();
1348     }
1349    
1350     delsub(v->nfa, left, right);
1351     assert(left->nouts == 0);
1352     freestate(v->nfa, left);
1353     assert(right->nins == 0);
1354     freestate(v->nfa, right);
1355     }
1356    
1357     /*
1358     - brackpart - handle one item (or range) within a bracket expression
1359     ^ static VOID brackpart(struct vars *, struct state *, struct state *);
1360     */
1361     static VOID
1362     brackpart(v, lp, rp)
1363     struct vars *v;
1364     struct state *lp;
1365     struct state *rp;
1366     {
1367     celt startc;
1368     celt endc;
1369     struct cvec *cv;
1370     chr *startp;
1371     chr *endp;
1372     chr c[1];
1373    
1374     /* parse something, get rid of special cases, take shortcuts */
1375     switch (v->nexttype) {
1376     case RANGE: /* a-b-c or other botch */
1377     ERR(REG_ERANGE);
1378     return;
1379     break;
1380     case PLAIN:
1381     c[0] = v->nextvalue;
1382     NEXT();
1383     /* shortcut for ordinary chr (not range, not MCCE leader) */
1384     if (!SEE(RANGE) && !ISCELEADER(v, c[0])) {
1385     onechr(v, c[0], lp, rp);
1386     return;
1387     }
1388     startc = element(v, c, c+1);
1389     NOERR();
1390     break;
1391     case COLLEL:
1392     startp = v->now;
1393     endp = scanplain(v);
1394     INSIST(startp < endp, REG_ECOLLATE);
1395     NOERR();
1396     startc = element(v, startp, endp);
1397     NOERR();
1398     break;
1399     case ECLASS:
1400     startp = v->now;
1401     endp = scanplain(v);
1402     INSIST(startp < endp, REG_ECOLLATE);
1403     NOERR();
1404     startc = element(v, startp, endp);
1405     NOERR();
1406     cv = eclass(v, startc, (v->cflags&REG_ICASE));
1407     NOERR();
1408     dovec(v, cv, lp, rp);
1409     return;
1410     break;
1411     case CCLASS:
1412     startp = v->now;
1413     endp = scanplain(v);
1414     INSIST(startp < endp, REG_ECTYPE);
1415     NOERR();
1416     cv = cclass(v, startp, endp, (v->cflags&REG_ICASE));
1417     NOERR();
1418     dovec(v, cv, lp, rp);
1419     return;
1420     break;
1421     default:
1422     ERR(REG_ASSERT);
1423     return;
1424     break;
1425     }
1426    
1427     if (SEE(RANGE)) {
1428     NEXT();
1429     switch (v->nexttype) {
1430     case PLAIN:
1431     case RANGE:
1432     c[0] = v->nextvalue;
1433     NEXT();
1434     endc = element(v, c, c+1);
1435     NOERR();
1436     break;
1437     case COLLEL:
1438     startp = v->now;
1439     endp = scanplain(v);
1440     INSIST(startp < endp, REG_ECOLLATE);
1441     NOERR();
1442     endc = element(v, startp, endp);
1443     NOERR();
1444     break;
1445     default:
1446     ERR(REG_ERANGE);
1447     return;
1448     break;
1449     }
1450     } else
1451     endc = startc;
1452    
1453     /*
1454     * Ranges are unportable. Actually, standard C does
1455     * guarantee that digits are contiguous, but making
1456     * that an exception is just too complicated.
1457     */
1458     if (startc != endc)
1459     NOTE(REG_UUNPORT);
1460     cv = range(v, startc, endc, (v->cflags&REG_ICASE));
1461     NOERR();
1462     dovec(v, cv, lp, rp);
1463     }
1464    
1465     /*
1466     - scanplain - scan PLAIN contents of [. etc.
1467     * Certain bits of trickery in lex.c know that this code does not try
1468     * to look past the final bracket of the [. etc.
1469     ^ static chr *scanplain(struct vars *);
1470     */
1471     static chr * /* just after end of sequence */
1472     scanplain(v)
1473     struct vars *v;
1474     {
1475     chr *endp;
1476    
1477     assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
1478     NEXT();
1479    
1480     endp = v->now;
1481     while (SEE(PLAIN)) {
1482     endp = v->now;
1483     NEXT();
1484     }
1485    
1486     assert(SEE(END) || ISERR());
1487     NEXT();
1488    
1489     return endp;
1490     }
1491    
1492     /*
1493     - leaders - process a cvec of collating elements to also include leaders
1494     * Also gives all characters involved their own colors, which is almost
1495     * certainly necessary, and sets up little disconnected subNFA.
1496     ^ static VOID leaders(struct vars *, struct cvec *);
1497     */
1498     static VOID
1499     leaders(v, cv)
1500     struct vars *v;
1501     struct cvec *cv;
1502     {
1503     int mcce;
1504     chr *p;
1505     chr leader;
1506     struct state *s;
1507     struct arc *a;
1508    
1509     v->mccepbegin = newstate(v->nfa);
1510     v->mccepend = newstate(v->nfa);
1511     NOERR();
1512    
1513     for (mcce = 0; mcce < cv->nmcces; mcce++) {
1514     p = cv->mcces[mcce];
1515     leader = *p;
1516     if (!haschr(cv, leader)) {
1517     addchr(cv, leader);
1518     s = newstate(v->nfa);
1519     newarc(v->nfa, PLAIN, subcolor(v->cm, leader),
1520     v->mccepbegin, s);
1521     okcolors(v->nfa, v->cm);
1522     } else {
1523     a = findarc(v->mccepbegin, PLAIN,
1524     GETCOLOR(v->cm, leader));
1525     assert(a != NULL);
1526     s = a->to;
1527     assert(s != v->mccepend);
1528     }
1529     p++;
1530     assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */
1531     newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);
1532     okcolors(v->nfa, v->cm);
1533     }
1534     }
1535    
1536     /*
1537     - onechr - fill in arcs for a plain character, and possible case complements
1538     * This is mostly a shortcut for efficient handling of the common case.
1539     ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *);
1540     */
1541     static VOID
1542     onechr(v, c, lp, rp)
1543     struct vars *v;
1544     pchr c;
1545     struct state *lp;
1546     struct state *rp;
1547     {
1548     if (!(v->cflags&REG_ICASE)) {
1549     newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
1550     return;
1551     }
1552    
1553     /* rats, need general case anyway... */
1554     dovec(v, allcases(v, c), lp, rp);
1555     }
1556    
1557     /*
1558     - dovec - fill in arcs for each element of a cvec
1559     * This one has to handle the messy cases, like MCCEs and MCCE leaders.
1560     ^ static VOID dovec(struct vars *, struct cvec *, struct state *,
1561     ^ struct state *);
1562     */
1563     static VOID
1564     dovec(v, cv, lp, rp)
1565     struct vars *v;
1566     struct cvec *cv;
1567     struct state *lp;
1568     struct state *rp;
1569     {
1570     chr ch, from, to;
1571     celt ce;
1572     chr *p;
1573     int i;
1574     color co;
1575     struct cvec *leads;
1576     struct arc *a;
1577     struct arc *pa; /* arc in prototype */
1578     struct state *s;
1579     struct state *ps; /* state in prototype */
1580    
1581     /* need a place to store leaders, if any */
1582     if (nmcces(v) > 0) {
1583     assert(v->mcces != NULL);
1584     if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) {
1585     if (v->cv2 != NULL)
1586     free(v->cv2);
1587     v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);
1588     NOERR();
1589     leads = v->cv2;
1590     } else
1591     leads = clearcvec(v->cv2);
1592     } else
1593     leads = NULL;
1594    
1595     /* first, get the ordinary characters out of the way */
1596     for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
1597     ch = *p;
1598     if (!ISCELEADER(v, ch))
1599     newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
1600     else {
1601     assert(singleton(v->cm, ch));
1602     assert(leads != NULL);
1603     if (!haschr(leads, ch))
1604     addchr(leads, ch);
1605     }
1606     }
1607    
1608     /* and the ranges */
1609     for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
1610     from = *p;
1611     to = *(p+1);
1612     while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) {
1613     if (from < ce)
1614     subrange(v, from, ce - 1, lp, rp);
1615     assert(singleton(v->cm, ce));
1616     assert(leads != NULL);
1617     if (!haschr(leads, ce))
1618     addchr(leads, ce);
1619     from = ce + 1;
1620     }
1621     if (from <= to)
1622     subrange(v, from, to, lp, rp);
1623     }
1624    
1625     if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)
1626     return;
1627    
1628     /* deal with the MCCE leaders */
1629     NOTE(REG_ULOCALE);
1630     for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) {
1631     co = GETCOLOR(v->cm, *p);
1632     a = findarc(lp, PLAIN, co);
1633     if (a != NULL)
1634     s = a->to;
1635     else {
1636     s = newstate(v->nfa);
1637     NOERR();
1638     newarc(v->nfa, PLAIN, co, lp, s);
1639     NOERR();
1640     }
1641     pa = findarc(v->mccepbegin, PLAIN, co);
1642     assert(pa != NULL);
1643     ps = pa->to;
1644     newarc(v->nfa, '$', 1, s, rp);
1645     newarc(v->nfa, '$', 0, s, rp);
1646     colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);
1647     NOERR();
1648     }
1649    
1650     /* and the MCCEs */
1651     for (i = 0; i < cv->nmcces; i++) {
1652     p = cv->mcces[i];
1653     assert(singleton(v->cm, *p));
1654     if (!singleton(v->cm, *p)) {
1655     ERR(REG_ASSERT);
1656     return;
1657     }
1658     ch = *p++;
1659     co = GETCOLOR(v->cm, ch);
1660     a = findarc(lp, PLAIN, co);
1661     if (a != NULL)
1662     s = a->to;
1663     else {
1664     s = newstate(v->nfa);
1665     NOERR();
1666     newarc(v->nfa, PLAIN, co, lp, s);
1667     NOERR();
1668     }
1669     assert(*p != 0); /* at least two chars */
1670     assert(singleton(v->cm, *p));
1671     ch = *p++;
1672     co = GETCOLOR(v->cm, ch);
1673     assert(*p == 0); /* and only two, for now */
1674     newarc(v->nfa, PLAIN, co, s, rp);
1675     NOERR();
1676     }
1677     }
1678    
1679     /*
1680     - nextleader - find next MCCE leader within range
1681     ^ static celt nextleader(struct vars *, pchr, pchr);
1682     */
1683     static celt /* NOCELT means none */
1684     nextleader(v, from, to)
1685     struct vars *v;
1686     pchr from;
1687     pchr to;
1688     {
1689     int i;
1690     chr *p;
1691     chr ch;
1692     celt it = NOCELT;
1693    
1694     if (v->mcces == NULL)
1695     return it;
1696    
1697     for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) {
1698     ch = *p;
1699     if (from <= ch && ch <= to)
1700     if (it == NOCELT || ch < it)
1701     it = ch;
1702     }
1703     return it;
1704     }
1705    
1706     /*
1707     - wordchrs - set up word-chr list for word-boundary stuff, if needed
1708     * The list is kept as a bunch of arcs between two dummy states; it's
1709     * disposed of by the unreachable-states sweep in NFA optimization.
1710     * Does NEXT(). Must not be called from any unusual lexical context.
1711     * This should be reconciled with the \w etc. handling in lex.c, and
1712     * should be cleaned up to reduce dependencies on input scanning.
1713     ^ static VOID wordchrs(struct vars *);
1714     */
1715     static VOID
1716     wordchrs(v)
1717     struct vars *v;
1718     {
1719     struct state *left;
1720     struct state *right;
1721    
1722     if (v->wordchrs != NULL) {
1723     NEXT(); /* for consistency */
1724     return;
1725     }
1726    
1727     left = newstate(v->nfa);
1728     right = newstate(v->nfa);
1729     NOERR();
1730     /* fine point: implemented with [::], and lexer will set REG_ULOCALE */
1731     lexword(v);
1732     NEXT();
1733     assert(v->savenow != NULL && SEE('['));
1734     bracket(v, left, right);
1735     assert((v->savenow != NULL && SEE(']')) || ISERR());
1736     NEXT();
1737     NOERR();
1738     v->wordchrs = left;
1739     }
1740    
1741     /*
1742     - subre - allocate a subre
1743     ^ static struct subre *subre(struct vars *, int, int, struct state *,
1744     ^ struct state *);
1745     */
1746     static struct subre *
1747     subre(v, op, flags, begin, end)
1748     struct vars *v;
1749     int op;
1750     int flags;
1751     struct state *begin;
1752     struct state *end;
1753     {
1754     struct subre *ret;
1755    
1756     ret = v->treefree;
1757     if (ret != NULL)
1758     v->treefree = ret->left;
1759     else {
1760     ret = (struct subre *)MALLOC(sizeof(struct subre));
1761     if (ret == NULL) {
1762     ERR(REG_ESPACE);
1763     return NULL;
1764     }
1765     ret->chain = v->treechain;
1766     v->treechain = ret;
1767     }
1768    
1769     assert(strchr("|.b(=", op) != NULL);
1770    
1771     ret->op = op;
1772     ret->flags = flags;
1773     ret->retry = 0;
1774     ret->subno = 0;
1775     ret->min = ret->max = 1;
1776     ret->left = NULL;
1777     ret->right = NULL;
1778     ret->begin = begin;
1779     ret->end = end;
1780     ZAPCNFA(ret->cnfa);
1781    
1782     return ret;
1783     }
1784    
1785     /*
1786     - freesubre - free a subRE subtree
1787     ^ static VOID freesubre(struct vars *, struct subre *);
1788     */
1789     static VOID
1790     freesubre(v, sr)
1791     struct vars *v; /* might be NULL */
1792     struct subre *sr;
1793     {
1794     if (sr == NULL)
1795     return;
1796    
1797     if (sr->left != NULL)
1798     freesubre(v, sr->left);
1799     if (sr->right != NULL)
1800     freesubre(v, sr->right);
1801    
1802     freesrnode(v, sr);
1803     }
1804    
1805     /*
1806     - freesrnode - free one node in a subRE subtree
1807     ^ static VOID freesrnode(struct vars *, struct subre *);
1808     */
1809     static VOID
1810     freesrnode(v, sr)
1811     struct vars *v; /* might be NULL */
1812     struct subre *sr;
1813     {
1814     if (sr == NULL)
1815     return;
1816    
1817     if (!NULLCNFA(sr->cnfa))
1818     freecnfa(&sr->cnfa);
1819     sr->flags = 0;
1820    
1821     if (v != NULL) {
1822     sr->left = v->treefree;
1823     v->treefree = sr;
1824     } else
1825     FREE(sr);
1826     }
1827    
1828     /*
1829     - optst - optimize a subRE subtree
1830     ^ static VOID optst(struct vars *, struct subre *);
1831     */
1832     static VOID
1833     optst(v, t)
1834     struct vars *v;
1835     struct subre *t;
1836     {
1837     if (t == NULL)
1838     return;
1839    
1840     /* recurse through children */
1841     if (t->left != NULL)
1842     optst(v, t->left);
1843     if (t->right != NULL)
1844     optst(v, t->right);
1845     }
1846    
1847     /*
1848     - numst - number tree nodes (assigning retry indexes)
1849     ^ static int numst(struct subre *, int);
1850     */
1851     static int /* next number */
1852     numst(t, start)
1853     struct subre *t;
1854     int start; /* starting point for subtree numbers */
1855     {
1856     int i;
1857    
1858     assert(t != NULL);
1859    
1860     i = start;
1861     t->retry = (short)i++;
1862     if (t->left != NULL)
1863     i = numst(t->left, i);
1864     if (t->right != NULL)
1865     i = numst(t->right, i);
1866     return i;
1867     }
1868    
1869     /*
1870     - markst - mark tree nodes as INUSE
1871     ^ static VOID markst(struct subre *);
1872     */
1873     static VOID
1874     markst(t)
1875     struct subre *t;
1876     {
1877     assert(t != NULL);
1878    
1879     t->flags |= INUSE;
1880     if (t->left != NULL)
1881     markst(t->left);
1882     if (t->right != NULL)
1883     markst(t->right);
1884     }
1885    
1886     /*
1887     - cleanst - free any tree nodes not marked INUSE
1888     ^ static VOID cleanst(struct vars *);
1889     */
1890     static VOID
1891     cleanst(v)
1892     struct vars *v;
1893     {
1894     struct subre *t;
1895     struct subre *next;
1896    
1897     for (t = v->treechain; t != NULL; t = next) {
1898     next = t->chain;
1899     if (!(t->flags&INUSE))
1900     FREE(t);
1901     }
1902     v->treechain = NULL;
1903     v->treefree = NULL; /* just on general principles */
1904     }
1905    
1906     /*
1907     - nfatree - turn a subRE subtree into a tree of compacted NFAs
1908     ^ static long nfatree(struct vars *, struct subre *, FILE *);
1909     */
1910     static long /* optimize results from top node */
1911     nfatree(v, t, f)
1912     struct vars *v;
1913     struct subre *t;
1914     FILE *f; /* for debug output */
1915     {
1916     assert(t != NULL && t->begin != NULL);
1917    
1918     if (t->left != NULL)
1919     (DISCARD)nfatree(v, t->left, f);
1920     if (t->right != NULL)
1921     (DISCARD)nfatree(v, t->right, f);
1922    
1923     return nfanode(v, t, f);
1924     }
1925    
1926     /*
1927     - nfanode - do one NFA for nfatree
1928     ^ static long nfanode(struct vars *, struct subre *, FILE *);
1929     */
1930     static long /* optimize results */
1931     nfanode(v, t, f)
1932     struct vars *v;
1933     struct subre *t;
1934     FILE *f; /* for debug output */
1935     {
1936     struct nfa *nfa;
1937     long ret = 0;
1938     char idbuf[50];
1939    
1940     assert(t->begin != NULL);
1941    
1942     if (f != NULL)
1943     fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",
1944     stid(t, idbuf, sizeof(idbuf)));
1945     nfa = newnfa(v, v->cm, v->nfa);
1946     NOERRZ();
1947     dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
1948     if (!ISERR()) {
1949     specialcolors(nfa);
1950     ret = optimize(nfa, f);
1951     }
1952     if (!ISERR())
1953     compact(nfa, &t->cnfa);
1954    
1955     freenfa(nfa);
1956     return ret;
1957     }
1958    
1959     /*
1960     - newlacon - allocate a lookahead-constraint subRE
1961     ^ static int newlacon(struct vars *, struct state *, struct state *, int);
1962     */
1963     static int /* lacon number */
1964     newlacon(v, begin, end, pos)
1965     struct vars *v;
1966     struct state *begin;
1967     struct state *end;
1968     int pos;
1969     {
1970     int n;
1971     struct subre *sub;
1972    
1973     if (v->nlacons == 0) {
1974     v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre));
1975     n = 1; /* skip 0th */
1976     v->nlacons = 2;
1977     } else {
1978     v->lacons = (struct subre *)REALLOC(v->lacons,
1979     (v->nlacons+1)*sizeof(struct subre));
1980     n = v->nlacons++;
1981     }
1982     if (v->lacons == NULL) {
1983     ERR(REG_ESPACE);
1984     return 0;
1985     }
1986     sub = &v->lacons[n];
1987     sub->begin = begin;
1988     sub->end = end;
1989     sub->subno = pos;
1990     ZAPCNFA(sub->cnfa);
1991     return n;
1992     }
1993    
1994     /*
1995     - freelacons - free lookahead-constraint subRE vector
1996     ^ static VOID freelacons(struct subre *, int);
1997     */
1998     static VOID
1999     freelacons(subs, n)
2000     struct subre *subs;
2001     int n;
2002     {
2003     struct subre *sub;
2004     int i;
2005    
2006     assert(n > 0);
2007     for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) /* no 0th */
2008     if (!NULLCNFA(sub->cnfa))
2009     freecnfa(&sub->cnfa);
2010     FREE(subs);
2011     }
2012    
2013     /*
2014     - rfree - free a whole RE (insides of regfree)
2015     ^ static VOID rfree(regex_t *);
2016     */
2017     static VOID
2018     rfree(re)
2019     regex_t *re;
2020     {
2021     struct guts *g;
2022    
2023     if (re == NULL || re->re_magic != REMAGIC)
2024     return;
2025    
2026     re->re_magic = 0; /* invalidate RE */
2027     g = (struct guts *)re->re_guts;
2028     re->re_guts = NULL;
2029     re->re_fns = NULL;
2030     g->magic = 0;
2031     freecm(&g->cmap);
2032     if (g->tree != NULL)
2033     freesubre((struct vars *)NULL, g->tree);
2034     if (g->lacons != NULL)
2035     freelacons(g->lacons, g->nlacons);
2036     if (!NULLCNFA(g->search))
2037     freecnfa(&g->search);
2038     FREE(g);
2039     }
2040    
2041     /*
2042     - dump - dump an RE in human-readable form
2043     ^ static VOID dump(regex_t *, FILE *);
2044     */
2045     static VOID
2046     dump(re, f)
2047     regex_t *re;
2048     FILE *f;
2049     {
2050     #ifdef REG_DEBUG
2051     struct guts *g;
2052     int i;
2053    
2054     if (re->re_magic != REMAGIC)
2055     fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic,
2056     REMAGIC);
2057     if (re->re_guts == NULL) {
2058     fprintf(f, "NULL guts!!!\n");
2059     return;
2060     }
2061     g = (struct guts *)re->re_guts;
2062     if (g->magic != GUTSMAGIC)
2063     fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic,
2064     GUTSMAGIC);
2065    
2066     fprintf(f, "\n\n\n========= DUMP ==========\n");
2067     fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",
2068     re->re_nsub, re->re_info, re->re_csize, g->ntree);
2069    
2070     dumpcolors(&g->cmap, f);
2071     if (!NULLCNFA(g->search)) {
2072     printf("\nsearch:\n");
2073     dumpcnfa(&g->search, f);
2074     }
2075     for (i = 1; i < g->nlacons; i++) {
2076     fprintf(f, "\nla%d (%s):\n", i,
2077     (g->lacons[i].subno) ? "positive" : "negative");
2078     dumpcnfa(&g->lacons[i].cnfa, f);
2079     }
2080     fprintf(f, "\n");
2081     dumpst(g->tree, f, 0);
2082     #endif
2083     }
2084    
2085     /*
2086     - dumpst - dump a subRE tree
2087     ^ static VOID dumpst(struct subre *, FILE *, int);
2088     */
2089     static VOID
2090     dumpst(t, f, nfapresent)
2091     struct subre *t;
2092     FILE *f;
2093     int nfapresent; /* is the original NFA still around? */
2094     {
2095     if (t == NULL)
2096     fprintf(f, "null tree\n");
2097     else
2098     stdump(t, f, nfapresent);
2099     fflush(f);
2100     }
2101    
2102     /*
2103     - stdump - recursive guts of dumpst
2104     ^ static VOID stdump(struct subre *, FILE *, int);
2105     */
2106     static VOID
2107     stdump(t, f, nfapresent)
2108     struct subre *t;
2109     FILE *f;
2110     int nfapresent; /* is the original NFA still around? */
2111     {
2112     char idbuf[50];
2113    
2114     fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);
2115     if (t->flags&LONGER)
2116     fprintf(f, " longest");
2117     if (t->flags&SHORTER)
2118     fprintf(f, " shortest");
2119     if (t->flags&MIXED)
2120     fprintf(f, " hasmixed");
2121     if (t->flags&CAP)
2122     fprintf(f, " hascapture");
2123     if (t->flags&BACKR)
2124     fprintf(f, " hasbackref");
2125     if (!(t->flags&INUSE))
2126     fprintf(f, " UNUSED");
2127     if (t->subno != 0)
2128     fprintf(f, " (#%d)", t->subno);
2129     if (t->min != 1 || t->max != 1) {
2130     fprintf(f, " {%d,", t->min);
2131     if (t->max != INFINITY)
2132     fprintf(f, "%d", t->max);
2133     fprintf(f, "}");
2134     }
2135     if (nfapresent)
2136     fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no);
2137     if (t->left != NULL)
2138     fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));
2139     if (t->right != NULL)
2140     fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));
2141     if (!NULLCNFA(t->cnfa)) {
2142     fprintf(f, "\n");
2143     dumpcnfa(&t->cnfa, f);
2144     fprintf(f, "\n");
2145     }
2146     if (t->left != NULL)
2147     stdump(t->left, f, nfapresent);
2148     if (t->right != NULL)
2149     stdump(t->right, f, nfapresent);
2150     }
2151    
2152     /*
2153     - stid - identify a subtree node for dumping
2154     ^ static char *stid(struct subre *, char *, size_t);
2155     */
2156     static char * /* points to buf or constant string */
2157     stid(t, buf, bufsize)
2158     struct subre *t;
2159     char *buf;
2160     size_t bufsize;
2161     {
2162     /* big enough for hex int or decimal t->retry? */
2163     if (bufsize < sizeof(int)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1)
2164     return "unable";
2165     if (t->retry != 0)
2166     sprintf(buf, "%d", t->retry);
2167     else
2168     sprintf(buf, "0x%x", (int)t); /* may lose bits, that's okay */
2169     return buf;
2170     }
2171    
2172     #include "regc_lex.c"
2173     #include "regc_color.c"
2174     #include "regc_nfa.c"
2175     #include "regc_cvec.c"
2176     #include "regc_locale.c"
2177    
2178     /* End of regcomp.c */

Properties

Name Value
svn:eol-style native
svn:keywords Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25