/[dtapublic]/projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regexec.c
ViewVC logotype

Annotation of /projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regexec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 64 - (hide annotations) (download)
Sun Oct 30 04:21:11 2016 UTC (7 years, 8 months ago) by dashley
File MIME type: text/plain
File size: 29663 byte(s)
Adjust line endings to Windows style.
Set properties to expand the "Header" keyword.
Change header and footer.
1 dashley 64 /* $Header$ */
2 dashley 25
3     /*
4     * re_*exec and friends - match REs
5     *
6     * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7     *
8     * Development of this software was funded, in part, by Cray Research Inc.,
9     * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
10     * Corporation, none of whom are responsible for the results. The author
11     * thanks all of them.
12     *
13     * Redistribution and use in source and binary forms -- with or without
14     * modification -- are permitted for any purpose, provided that
15     * redistributions in source form retain this entire copyright notice and
16     * indicate the origin and nature of any modifications.
17     *
18     * I'd appreciate being given credit for this package in the documentation
19     * of software which uses it, but that is not a requirement.
20     *
21     * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
22     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
23     * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
24     * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25     * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26     * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27     * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28     * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29     * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30     * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     *
32     */
33    
34     #include "regguts.h"
35    
36    
37    
38     /* lazy-DFA representation */
39     struct arcp { /* "pointer" to an outarc */
40     struct sset *ss;
41     color co;
42     };
43    
44     struct sset { /* state set */
45     unsigned *states; /* pointer to bitvector */
46     unsigned hash; /* hash of bitvector */
47     # define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
48     # define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
49     memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
50     int flags;
51     # define STARTER 01 /* the initial state set */
52     # define POSTSTATE 02 /* includes the goal state */
53     # define LOCKED 04 /* locked in cache */
54     # define NOPROGRESS 010 /* zero-progress state set */
55     struct arcp ins; /* chain of inarcs pointing here */
56     chr *lastseen; /* last entered on arrival here */
57     struct sset **outs; /* outarc vector indexed by color */
58     struct arcp *inchain; /* chain-pointer vector for outarcs */
59     };
60    
61     struct dfa {
62     int nssets; /* size of cache */
63     int nssused; /* how many entries occupied yet */
64     int nstates; /* number of states */
65     int ncolors; /* length of outarc and inchain vectors */
66     int wordsper; /* length of state-set bitvectors */
67     struct sset *ssets; /* state-set cache */
68     unsigned *statesarea; /* bitvector storage */
69     unsigned *work; /* pointer to work area within statesarea */
70     struct sset **outsarea; /* outarc-vector storage */
71     struct arcp *incarea; /* inchain storage */
72     struct cnfa *cnfa;
73     struct colormap *cm;
74     chr *lastpost; /* location of last cache-flushed success */
75     chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
76     struct sset *search; /* replacement-search-pointer memory */
77     int cptsmalloced; /* were the areas individually malloced? */
78     char *mallocarea; /* self, or master malloced area, or NULL */
79     };
80    
81     #define WORK 1 /* number of work bitvectors needed */
82    
83     /* setup for non-malloc allocation for small cases */
84     #define FEWSTATES 20 /* must be less than UBITS */
85     #define FEWCOLORS 15
86     struct smalldfa {
87     struct dfa dfa;
88     struct sset ssets[FEWSTATES*2];
89     unsigned statesarea[FEWSTATES*2 + WORK];
90     struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
91     struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
92     };
93     #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
94    
95    
96    
97     /* internal variables, bundled for easy passing around */
98     struct vars {
99     regex_t *re;
100     struct guts *g;
101     int eflags; /* copies of arguments */
102     size_t nmatch;
103     regmatch_t *pmatch;
104     rm_detail_t *details;
105     chr *start; /* start of string */
106     chr *stop; /* just past end of string */
107     int err; /* error code if any (0 none) */
108     regoff_t *mem; /* memory vector for backtracking */
109     struct smalldfa dfa1;
110     struct smalldfa dfa2;
111     };
112     #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
113     #define ISERR() VISERR(v)
114     #define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
115     #define ERR(e) VERR(v, e) /* record an error */
116     #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
117     #define OFF(p) ((p) - v->start)
118     #define LOFF(p) ((long)OFF(p))
119    
120    
121    
122     /*
123     * forward declarations
124     */
125     /* =====^!^===== begin forwards =====^!^===== */
126     /* automatically gathered by fwd; do not hand-edit */
127     /* === regexec.c === */
128     int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
129     static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
130     static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
131     static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **));
132     static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));
133     static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));
134     static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
135     static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
136     static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
137     static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
138     static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
139     static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
140     static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
141     static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
142     static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
143     /* === rege_dfa.c === */
144     static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));
145     static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));
146     static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));
147     static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
148     static VOID freedfa _ANSI_ARGS_((struct dfa *));
149     static unsigned hash _ANSI_ARGS_((unsigned *, int));
150     static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
151     static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
152     static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
153     static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
154     static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
155     /* automatically gathered by fwd; do not hand-edit */
156     /* =====^!^===== end forwards =====^!^===== */
157    
158    
159    
160     /*
161     - exec - match regular expression
162     ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
163     ^ size_t, regmatch_t [], int);
164     */
165     int
166     exec(re, string, len, details, nmatch, pmatch, flags)
167     regex_t *re;
168     CONST chr *string;
169     size_t len;
170     rm_detail_t *details;
171     size_t nmatch;
172     regmatch_t pmatch[];
173     int flags;
174     {
175     struct vars var;
176     register struct vars *v = &var;
177     int st;
178     size_t n;
179     int backref;
180     # define LOCALMAT 20
181     regmatch_t mat[LOCALMAT];
182     # define LOCALMEM 40
183     regoff_t mem[LOCALMEM];
184    
185     /* sanity checks */
186     if (re == NULL || string == NULL || re->re_magic != REMAGIC)
187     return REG_INVARG;
188     if (re->re_csize != sizeof(chr))
189     return REG_MIXED;
190    
191     /* setup */
192     v->re = re;
193     v->g = (struct guts *)re->re_guts;
194     if ((v->g->cflags&REG_EXPECT) && details == NULL)
195     return REG_INVARG;
196     if (v->g->info&REG_UIMPOSSIBLE)
197     return REG_NOMATCH;
198     backref = (v->g->info&REG_UBACKREF) ? 1 : 0;
199     v->eflags = flags;
200     if (v->g->cflags&REG_NOSUB)
201     nmatch = 0; /* override client */
202     v->nmatch = nmatch;
203     if (backref) {
204     /* need work area */
205     if (v->g->nsub + 1 <= LOCALMAT)
206     v->pmatch = mat;
207     else
208     v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
209     sizeof(regmatch_t));
210     if (v->pmatch == NULL)
211     return REG_ESPACE;
212     v->nmatch = v->g->nsub + 1;
213     } else
214     v->pmatch = pmatch;
215     v->details = details;
216     v->start = (chr *)string;
217     v->stop = (chr *)string + len;
218     v->err = 0;
219     if (backref) {
220     /* need retry memory */
221     assert(v->g->ntree >= 0);
222     n = (size_t)v->g->ntree;
223     if (n <= LOCALMEM)
224     v->mem = mem;
225     else
226     v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
227     if (v->mem == NULL) {
228     if (v->pmatch != pmatch && v->pmatch != mat)
229     FREE(v->pmatch);
230     return REG_ESPACE;
231     }
232     } else
233     v->mem = NULL;
234    
235     /* do it */
236     assert(v->g->tree != NULL);
237     if (backref)
238     st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
239     else
240     st = find(v, &v->g->tree->cnfa, &v->g->cmap);
241    
242     /* copy (portion of) match vector over if necessary */
243     if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
244     zapsubs(pmatch, nmatch);
245     n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
246     memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
247     }
248    
249     /* clean up */
250     if (v->pmatch != pmatch && v->pmatch != mat)
251     FREE(v->pmatch);
252     if (v->mem != NULL && v->mem != mem)
253     FREE(v->mem);
254     return st;
255     }
256    
257     /*
258     - find - find a match for the main NFA (no-complications case)
259     ^ static int find(struct vars *, struct cnfa *, struct colormap *);
260     */
261     static int
262     find(v, cnfa, cm)
263     struct vars *v;
264     struct cnfa *cnfa;
265     struct colormap *cm;
266     {
267     struct dfa *s;
268     struct dfa *d;
269     chr *begin;
270     chr *end = NULL;
271     chr *cold;
272     chr *open; /* open and close of range of possible starts */
273     chr *close;
274     int hitend;
275     int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
276    
277     /* first, a shot with the search RE */
278     s = newdfa(v, &v->g->search, cm, &v->dfa1);
279     assert(!(ISERR() && s != NULL));
280     NOERR();
281     MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
282     cold = NULL;
283     close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);
284     freedfa(s);
285     NOERR();
286     if (v->g->cflags&REG_EXPECT) {
287     assert(v->details != NULL);
288     if (cold != NULL)
289     v->details->rm_extend.rm_so = OFF(cold);
290     else
291     v->details->rm_extend.rm_so = OFF(v->stop);
292     v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
293     }
294     if (close == NULL) /* not found */
295     return REG_NOMATCH;
296     if (v->nmatch == 0) /* found, don't need exact location */
297     return REG_OKAY;
298    
299     /* find starting point and match */
300     assert(cold != NULL);
301     open = cold;
302     cold = NULL;
303     MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
304     d = newdfa(v, cnfa, cm, &v->dfa1);
305     assert(!(ISERR() && d != NULL));
306     NOERR();
307     for (begin = open; begin <= close; begin++) {
308     MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
309     if (shorter)
310     end = shortest(v, d, begin, begin, v->stop,
311     (chr **)NULL, &hitend);
312     else
313     end = longest(v, d, begin, v->stop, &hitend);
314     NOERR();
315     if (hitend && cold == NULL)
316     cold = begin;
317     if (end != NULL)
318     break; /* NOTE BREAK OUT */
319     }
320     assert(end != NULL); /* search RE succeeded so loop should */
321     freedfa(d);
322    
323     /* and pin down details */
324     assert(v->nmatch > 0);
325     v->pmatch[0].rm_so = OFF(begin);
326     v->pmatch[0].rm_eo = OFF(end);
327     if (v->g->cflags&REG_EXPECT) {
328     if (cold != NULL)
329     v->details->rm_extend.rm_so = OFF(cold);
330     else
331     v->details->rm_extend.rm_so = OFF(v->stop);
332     v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
333     }
334     if (v->nmatch == 1) /* no need for submatches */
335     return REG_OKAY;
336    
337     /* submatches */
338     zapsubs(v->pmatch, v->nmatch);
339     return dissect(v, v->g->tree, begin, end);
340     }
341    
342     /*
343     - cfind - find a match for the main NFA (with complications)
344     ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
345     */
346     static int
347     cfind(v, cnfa, cm)
348     struct vars *v;
349     struct cnfa *cnfa;
350     struct colormap *cm;
351     {
352     struct dfa *s;
353     struct dfa *d;
354     chr *cold;
355     int ret;
356    
357     s = newdfa(v, &v->g->search, cm, &v->dfa1);
358     NOERR();
359     d = newdfa(v, cnfa, cm, &v->dfa2);
360     if (ISERR()) {
361     assert(d == NULL);
362     freedfa(s);
363     return v->err;
364     }
365    
366     ret = cfindloop(v, cnfa, cm, d, s, &cold);
367    
368     freedfa(d);
369     freedfa(s);
370     NOERR();
371     if (v->g->cflags&REG_EXPECT) {
372     assert(v->details != NULL);
373     if (cold != NULL)
374     v->details->rm_extend.rm_so = OFF(cold);
375     else
376     v->details->rm_extend.rm_so = OFF(v->stop);
377     v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
378     }
379     return ret;
380     }
381    
382     /*
383     - cfindloop - the heart of cfind
384     ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
385     ^ struct dfa *, struct dfa *, chr **);
386     */
387     static int
388     cfindloop(v, cnfa, cm, d, s, coldp)
389     struct vars *v;
390     struct cnfa *cnfa;
391     struct colormap *cm;
392     struct dfa *d;
393     struct dfa *s;
394     chr **coldp; /* where to put coldstart pointer */
395     {
396     chr *begin;
397     chr *end;
398     chr *cold;
399     chr *open; /* open and close of range of possible starts */
400     chr *close;
401     chr *estart;
402     chr *estop;
403     int er;
404     int shorter = v->g->tree->flags&SHORTER;
405     int hitend;
406    
407     assert(d != NULL && s != NULL);
408     cold = NULL;
409     close = v->start;
410     do {
411     MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
412     close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);
413     if (close == NULL)
414     break; /* NOTE BREAK */
415     assert(cold != NULL);
416     open = cold;
417     cold = NULL;
418     MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
419     for (begin = open; begin <= close; begin++) {
420     MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
421     estart = begin;
422     estop = v->stop;
423     for (;;) {
424     if (shorter)
425     end = shortest(v, d, begin, estart,
426     estop, (chr **)NULL, &hitend);
427     else
428     end = longest(v, d, begin, estop,
429     &hitend);
430     if (hitend && cold == NULL)
431     cold = begin;
432     if (end == NULL)
433     break; /* NOTE BREAK OUT */
434     MDEBUG(("tentative end %ld\n", LOFF(end)));
435     zapsubs(v->pmatch, v->nmatch);
436     zapmem(v, v->g->tree);
437     er = cdissect(v, v->g->tree, begin, end);
438     if (er == REG_OKAY) {
439     if (v->nmatch > 0) {
440     v->pmatch[0].rm_so = OFF(begin);
441     v->pmatch[0].rm_eo = OFF(end);
442     }
443     *coldp = cold;
444     return REG_OKAY;
445     }
446     if (er != REG_NOMATCH) {
447     ERR(er);
448     return er;
449     }
450     if ((shorter) ? end == estop : end == begin) {
451     /* no point in trying again */
452     *coldp = cold;
453     return REG_NOMATCH;
454     }
455     /* go around and try again */
456     if (shorter)
457     estart = end + 1;
458     else
459     estop = end - 1;
460     }
461     }
462     } while (close < v->stop);
463    
464     *coldp = cold;
465     return REG_NOMATCH;
466     }
467    
468     /*
469     - zapsubs - initialize the subexpression matches to "no match"
470     ^ static VOID zapsubs(regmatch_t *, size_t);
471     */
472     static VOID
473     zapsubs(p, n)
474     regmatch_t *p;
475     size_t n;
476     {
477     size_t i;
478    
479     for (i = n-1; i > 0; i--) {
480     p[i].rm_so = -1;
481     p[i].rm_eo = -1;
482     }
483     }
484    
485     /*
486     - zapmem - initialize the retry memory of a subtree to zeros
487     ^ static VOID zapmem(struct vars *, struct subre *);
488     */
489     static VOID
490     zapmem(v, t)
491     struct vars *v;
492     struct subre *t;
493     {
494     if (t == NULL)
495     return;
496    
497     assert(v->mem != NULL);
498     v->mem[t->retry] = 0;
499     if (t->op == '(') {
500     assert(t->subno > 0);
501     v->pmatch[t->subno].rm_so = -1;
502     v->pmatch[t->subno].rm_eo = -1;
503     }
504    
505     if (t->left != NULL)
506     zapmem(v, t->left);
507     if (t->right != NULL)
508     zapmem(v, t->right);
509     }
510    
511     /*
512     - subset - set any subexpression relevant to a successful subre
513     ^ static VOID subset(struct vars *, struct subre *, chr *, chr *);
514     */
515     static VOID
516     subset(v, sub, begin, end)
517     struct vars *v;
518     struct subre *sub;
519     chr *begin;
520     chr *end;
521     {
522     int n = sub->subno;
523    
524     assert(n > 0);
525     if ((size_t)n >= v->nmatch)
526     return;
527    
528     MDEBUG(("setting %d\n", n));
529     v->pmatch[n].rm_so = OFF(begin);
530     v->pmatch[n].rm_eo = OFF(end);
531     }
532    
533     /*
534     - dissect - determine subexpression matches (uncomplicated case)
535     ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
536     */
537     static int /* regexec return code */
538     dissect(v, t, begin, end)
539     struct vars *v;
540     struct subre *t;
541     chr *begin; /* beginning of relevant substring */
542     chr *end; /* end of same */
543     {
544     assert(t != NULL);
545     MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
546    
547     switch (t->op) {
548     case '=': /* terminal node */
549     assert(t->left == NULL && t->right == NULL);
550     return REG_OKAY; /* no action, parent did the work */
551     break;
552     case '|': /* alternation */
553     assert(t->left != NULL);
554     return altdissect(v, t, begin, end);
555     break;
556     case 'b': /* back ref -- shouldn't be calling us! */
557     return REG_ASSERT;
558     break;
559     case '.': /* concatenation */
560     assert(t->left != NULL && t->right != NULL);
561     return condissect(v, t, begin, end);
562     break;
563     case '(': /* capturing */
564     assert(t->left != NULL && t->right == NULL);
565     assert(t->subno > 0);
566     subset(v, t, begin, end);
567     return dissect(v, t->left, begin, end);
568     break;
569     default:
570     return REG_ASSERT;
571     break;
572     }
573     }
574    
575     /*
576     - condissect - determine concatenation subexpression matches (uncomplicated)
577     ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
578     */
579     static int /* regexec return code */
580     condissect(v, t, begin, end)
581     struct vars *v;
582     struct subre *t;
583     chr *begin; /* beginning of relevant substring */
584     chr *end; /* end of same */
585     {
586     struct dfa *d;
587     struct dfa *d2;
588     chr *mid;
589     int i;
590     int shorter = (t->left->flags&SHORTER) ? 1 : 0;
591     chr *stop = (shorter) ? end : begin;
592    
593     assert(t->op == '.');
594     assert(t->left != NULL && t->left->cnfa.nstates > 0);
595     assert(t->right != NULL && t->right->cnfa.nstates > 0);
596    
597     d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
598     NOERR();
599     d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
600     if (ISERR()) {
601     assert(d2 == NULL);
602     freedfa(d);
603     return v->err;
604     }
605    
606     /* pick a tentative midpoint */
607     if (shorter)
608     mid = shortest(v, d, begin, begin, end, (chr **)NULL,
609     (int *)NULL);
610     else
611     mid = longest(v, d, begin, end, (int *)NULL);
612     if (mid == NULL) {
613     freedfa(d);
614     freedfa(d2);
615     return REG_ASSERT;
616     }
617     MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
618    
619     /* iterate until satisfaction or failure */
620     while (longest(v, d2, mid, end, (int *)NULL) != end) {
621     /* that midpoint didn't work, find a new one */
622     if (mid == stop) {
623     /* all possibilities exhausted! */
624     MDEBUG(("no midpoint!\n"));
625     freedfa(d);
626     freedfa(d2);
627     return REG_ASSERT;
628     }
629     if (shorter)
630     mid = shortest(v, d, begin, mid+1, end, (chr **)NULL,
631     (int *)NULL);
632     else
633     mid = longest(v, d, begin, mid-1, (int *)NULL);
634     if (mid == NULL) {
635     /* failed to find a new one! */
636     MDEBUG(("failed midpoint!\n"));
637     freedfa(d);
638     freedfa(d2);
639     return REG_ASSERT;
640     }
641     MDEBUG(("new midpoint %ld\n", LOFF(mid)));
642     }
643    
644     /* satisfaction */
645     MDEBUG(("successful\n"));
646     freedfa(d);
647     freedfa(d2);
648     i = dissect(v, t->left, begin, mid);
649     if (i != REG_OKAY)
650     return i;
651     return dissect(v, t->right, mid, end);
652     }
653    
654     /*
655     - altdissect - determine alternative subexpression matches (uncomplicated)
656     ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
657     */
658     static int /* regexec return code */
659     altdissect(v, t, begin, end)
660     struct vars *v;
661     struct subre *t;
662     chr *begin; /* beginning of relevant substring */
663     chr *end; /* end of same */
664     {
665     struct dfa *d;
666     int i;
667    
668     assert(t != NULL);
669     assert(t->op == '|');
670    
671     for (i = 0; t != NULL; t = t->right, i++) {
672     MDEBUG(("trying %dth\n", i));
673     assert(t->left != NULL && t->left->cnfa.nstates > 0);
674     d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
675     if (ISERR())
676     return v->err;
677     if (longest(v, d, begin, end, (int *)NULL) == end) {
678     MDEBUG(("success\n"));
679     freedfa(d);
680     return dissect(v, t->left, begin, end);
681     }
682     freedfa(d);
683     }
684     return REG_ASSERT; /* none of them matched?!? */
685     }
686    
687     /*
688     - cdissect - determine subexpression matches (with complications)
689     * The retry memory stores the offset of the trial midpoint from begin,
690     * plus 1 so that 0 uniquely means "clean slate".
691     ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
692     */
693     static int /* regexec return code */
694     cdissect(v, t, begin, end)
695     struct vars *v;
696     struct subre *t;
697     chr *begin; /* beginning of relevant substring */
698     chr *end; /* end of same */
699     {
700     int er;
701    
702     assert(t != NULL);
703     MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
704    
705     switch (t->op) {
706     case '=': /* terminal node */
707     assert(t->left == NULL && t->right == NULL);
708     return REG_OKAY; /* no action, parent did the work */
709     break;
710     case '|': /* alternation */
711     assert(t->left != NULL);
712     return caltdissect(v, t, begin, end);
713     break;
714     case 'b': /* back ref -- shouldn't be calling us! */
715     assert(t->left == NULL && t->right == NULL);
716     return cbrdissect(v, t, begin, end);
717     break;
718     case '.': /* concatenation */
719     assert(t->left != NULL && t->right != NULL);
720     return ccondissect(v, t, begin, end);
721     break;
722     case '(': /* capturing */
723     assert(t->left != NULL && t->right == NULL);
724     assert(t->subno > 0);
725     er = cdissect(v, t->left, begin, end);
726     if (er == REG_OKAY)
727     subset(v, t, begin, end);
728     return er;
729     break;
730     default:
731     return REG_ASSERT;
732     break;
733     }
734     }
735    
736     /*
737     - ccondissect - concatenation subexpression matches (with complications)
738     * The retry memory stores the offset of the trial midpoint from begin,
739     * plus 1 so that 0 uniquely means "clean slate".
740     ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
741     */
742     static int /* regexec return code */
743     ccondissect(v, t, begin, end)
744     struct vars *v;
745     struct subre *t;
746     chr *begin; /* beginning of relevant substring */
747     chr *end; /* end of same */
748     {
749     struct dfa *d;
750     struct dfa *d2;
751     chr *mid;
752     int er;
753    
754     assert(t->op == '.');
755     assert(t->left != NULL && t->left->cnfa.nstates > 0);
756     assert(t->right != NULL && t->right->cnfa.nstates > 0);
757    
758     if (t->left->flags&SHORTER) /* reverse scan */
759     return crevdissect(v, t, begin, end);
760    
761     d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
762     if (ISERR())
763     return v->err;
764     d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
765     if (ISERR()) {
766     freedfa(d);
767     return v->err;
768     }
769     MDEBUG(("cconcat %d\n", t->retry));
770    
771     /* pick a tentative midpoint */
772     if (v->mem[t->retry] == 0) {
773     mid = longest(v, d, begin, end, (int *)NULL);
774     if (mid == NULL) {
775     freedfa(d);
776     freedfa(d2);
777     return REG_NOMATCH;
778     }
779     MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
780     v->mem[t->retry] = (mid - begin) + 1;
781     } else {
782     mid = begin + (v->mem[t->retry] - 1);
783     MDEBUG(("working midpoint %ld\n", LOFF(mid)));
784     }
785    
786     /* iterate until satisfaction or failure */
787     for (;;) {
788     /* try this midpoint on for size */
789     er = cdissect(v, t->left, begin, mid);
790     if (er == REG_OKAY &&
791     longest(v, d2, mid, end, (int *)NULL) == end &&
792     (er = cdissect(v, t->right, mid, end)) ==
793     REG_OKAY)
794     break; /* NOTE BREAK OUT */
795     if (er != REG_OKAY && er != REG_NOMATCH) {
796     freedfa(d);
797     freedfa(d2);
798     return er;
799     }
800    
801     /* that midpoint didn't work, find a new one */
802     if (mid == begin) {
803     /* all possibilities exhausted */
804     MDEBUG(("%d no midpoint\n", t->retry));
805     freedfa(d);
806     freedfa(d2);
807     return REG_NOMATCH;
808     }
809     mid = longest(v, d, begin, mid-1, (int *)NULL);
810     if (mid == NULL) {
811     /* failed to find a new one */
812     MDEBUG(("%d failed midpoint\n", t->retry));
813     freedfa(d);
814     freedfa(d2);
815     return REG_NOMATCH;
816     }
817     MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
818     v->mem[t->retry] = (mid - begin) + 1;
819     zapmem(v, t->left);
820     zapmem(v, t->right);
821     }
822    
823     /* satisfaction */
824     MDEBUG(("successful\n"));
825     freedfa(d);
826     freedfa(d2);
827     return REG_OKAY;
828     }
829    
830     /*
831     - crevdissect - determine backref shortest-first subexpression matches
832     * The retry memory stores the offset of the trial midpoint from begin,
833     * plus 1 so that 0 uniquely means "clean slate".
834     ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
835     */
836     static int /* regexec return code */
837     crevdissect(v, t, begin, end)
838     struct vars *v;
839     struct subre *t;
840     chr *begin; /* beginning of relevant substring */
841     chr *end; /* end of same */
842     {
843     struct dfa *d;
844     struct dfa *d2;
845     chr *mid;
846     int er;
847    
848     assert(t->op == '.');
849     assert(t->left != NULL && t->left->cnfa.nstates > 0);
850     assert(t->right != NULL && t->right->cnfa.nstates > 0);
851     assert(t->left->flags&SHORTER);
852    
853     /* concatenation -- need to split the substring between parts */
854     d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
855     if (ISERR())
856     return v->err;
857     d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
858     if (ISERR()) {
859     freedfa(d);
860     return v->err;
861     }
862     MDEBUG(("crev %d\n", t->retry));
863    
864     /* pick a tentative midpoint */
865     if (v->mem[t->retry] == 0) {
866     mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);
867     if (mid == NULL) {
868     freedfa(d);
869     freedfa(d2);
870     return REG_NOMATCH;
871     }
872     MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
873     v->mem[t->retry] = (mid - begin) + 1;
874     } else {
875     mid = begin + (v->mem[t->retry] - 1);
876     MDEBUG(("working midpoint %ld\n", LOFF(mid)));
877     }
878    
879     /* iterate until satisfaction or failure */
880     for (;;) {
881     /* try this midpoint on for size */
882     er = cdissect(v, t->left, begin, mid);
883     if (er == REG_OKAY &&
884     longest(v, d2, mid, end, (int *)NULL) == end &&
885     (er = cdissect(v, t->right, mid, end)) ==
886     REG_OKAY)
887     break; /* NOTE BREAK OUT */
888     if (er != REG_OKAY && er != REG_NOMATCH) {
889     freedfa(d);
890     freedfa(d2);
891     return er;
892     }
893    
894     /* that midpoint didn't work, find a new one */
895     if (mid == end) {
896     /* all possibilities exhausted */
897     MDEBUG(("%d no midpoint\n", t->retry));
898     freedfa(d);
899     freedfa(d2);
900     return REG_NOMATCH;
901     }
902     mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);
903     if (mid == NULL) {
904     /* failed to find a new one */
905     MDEBUG(("%d failed midpoint\n", t->retry));
906     freedfa(d);
907     freedfa(d2);
908     return REG_NOMATCH;
909     }
910     MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
911     v->mem[t->retry] = (mid - begin) + 1;
912     zapmem(v, t->left);
913     zapmem(v, t->right);
914     }
915    
916     /* satisfaction */
917     MDEBUG(("successful\n"));
918     freedfa(d);
919     freedfa(d2);
920     return REG_OKAY;
921     }
922    
923     /*
924     - cbrdissect - determine backref subexpression matches
925     ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
926     */
927     static int /* regexec return code */
928     cbrdissect(v, t, begin, end)
929     struct vars *v;
930     struct subre *t;
931     chr *begin; /* beginning of relevant substring */
932     chr *end; /* end of same */
933     {
934     int i;
935     int n = t->subno;
936     size_t len;
937     chr *paren;
938     chr *p;
939     chr *stop;
940     int min = t->min;
941     int max = t->max;
942    
943     assert(t != NULL);
944     assert(t->op == 'b');
945     assert(n >= 0);
946     assert((size_t)n < v->nmatch);
947    
948     MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
949    
950     if (v->pmatch[n].rm_so == -1)
951     return REG_NOMATCH;
952     paren = v->start + v->pmatch[n].rm_so;
953     len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
954    
955     /* no room to maneuver -- retries are pointless */
956     if (v->mem[t->retry])
957     return REG_NOMATCH;
958     v->mem[t->retry] = 1;
959    
960     /* special-case zero-length string */
961     if (len == 0) {
962     if (begin == end)
963     return REG_OKAY;
964     return REG_NOMATCH;
965     }
966    
967     /* and too-short string */
968     assert(end >= begin);
969     if ((size_t)(end - begin) < len)
970     return REG_NOMATCH;
971     stop = end - len;
972    
973     /* count occurrences */
974     i = 0;
975     for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
976     if ((*v->g->compare)(paren, p, len) != 0)
977     break;
978     i++;
979     }
980     MDEBUG(("cbackref found %d\n", i));
981    
982     /* and sort it out */
983     if (p != end) /* didn't consume all of it */
984     return REG_NOMATCH;
985     if (min <= i && (i <= max || max == INFINITY))
986     return REG_OKAY;
987     return REG_NOMATCH; /* out of range */
988     }
989    
990     /*
991     - caltdissect - determine alternative subexpression matches (w. complications)
992     ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
993     */
994     static int /* regexec return code */
995     caltdissect(v, t, begin, end)
996     struct vars *v;
997     struct subre *t;
998     chr *begin; /* beginning of relevant substring */
999     chr *end; /* end of same */
1000     {
1001     struct dfa *d;
1002     int er;
1003     # define UNTRIED 0 /* not yet tried at all */
1004     # define TRYING 1 /* top matched, trying submatches */
1005     # define TRIED 2 /* top didn't match or submatches exhausted */
1006    
1007     if (t == NULL)
1008     return REG_NOMATCH;
1009     assert(t->op == '|');
1010     if (v->mem[t->retry] == TRIED)
1011     return caltdissect(v, t->right, begin, end);
1012    
1013     MDEBUG(("calt n%d\n", t->retry));
1014     assert(t->left != NULL);
1015    
1016     if (v->mem[t->retry] == UNTRIED) {
1017     d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1018     if (ISERR())
1019     return v->err;
1020     if (longest(v, d, begin, end, (int *)NULL) != end) {
1021     freedfa(d);
1022     v->mem[t->retry] = TRIED;
1023     return caltdissect(v, t->right, begin, end);
1024     }
1025     freedfa(d);
1026     MDEBUG(("calt matched\n"));
1027     v->mem[t->retry] = TRYING;
1028     }
1029    
1030     er = cdissect(v, t->left, begin, end);
1031     if (er != REG_NOMATCH)
1032     return er;
1033    
1034     v->mem[t->retry] = TRIED;
1035     return caltdissect(v, t->right, begin, end);
1036     }
1037    
1038    
1039    
1040     #include "rege_dfa.c"
1041    
1042     /* $History: regexec.c $
1043     *
1044     * ***************** Version 1 *****************
1045     * User: Dtashley Date: 1/02/01 Time: 12:11a
1046     * Created in $/IjuScripter, IjuConsole/Source/Tcl Base
1047     * Initial check-in.
1048     */
1049    
1050     /* End of REGEXEC.C */

Properties

Name Value
svn:keywords Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25