/[dtapublic]/projs/dtats/trunk/shared_source/c_tcl_base_7_5_w_mods/regexec.c
ViewVC logotype

Annotation of /projs/dtats/trunk/shared_source/c_tcl_base_7_5_w_mods/regexec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 98 - (hide annotations) (download)
Sun Dec 18 00:57:31 2016 UTC (7 years, 5 months ago) by dashley
File MIME type: text/plain
File size: 28398 byte(s)
Reorganization.
1 dashley 71 /* $Header$ */
2     /*
3     * re_*exec and friends - match REs
4     *
5     * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6     *
7     * Development of this software was funded, in part, by Cray Research Inc.,
8     * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9     * Corporation, none of whom are responsible for the results. The author
10     * thanks all of them.
11     *
12     * Redistribution and use in source and binary forms -- with or without
13     * modification -- are permitted for any purpose, provided that
14     * redistributions in source form retain this entire copyright notice and
15     * indicate the origin and nature of any modifications.
16     *
17     * I'd appreciate being given credit for this package in the documentation
18     * of software which uses it, but that is not a requirement.
19     *
20     * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22     * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23     * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24     * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25     * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26     * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27     * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28     * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29     * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30     *
31     */
32    
33     #include "regguts.h"
34    
35    
36    
37     /* lazy-DFA representation */
38     struct arcp { /* "pointer" to an outarc */
39     struct sset *ss;
40     color co;
41     };
42    
43     struct sset { /* state set */
44     unsigned *states; /* pointer to bitvector */
45     unsigned hash; /* hash of bitvector */
46     # define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
47     # define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
48     memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
49     int flags;
50     # define STARTER 01 /* the initial state set */
51     # define POSTSTATE 02 /* includes the goal state */
52     # define LOCKED 04 /* locked in cache */
53     # define NOPROGRESS 010 /* zero-progress state set */
54     struct arcp ins; /* chain of inarcs pointing here */
55     chr *lastseen; /* last entered on arrival here */
56     struct sset **outs; /* outarc vector indexed by color */
57     struct arcp *inchain; /* chain-pointer vector for outarcs */
58     };
59    
60     struct dfa {
61     int nssets; /* size of cache */
62     int nssused; /* how many entries occupied yet */
63     int nstates; /* number of states */
64     int ncolors; /* length of outarc and inchain vectors */
65     int wordsper; /* length of state-set bitvectors */
66     struct sset *ssets; /* state-set cache */
67     unsigned *statesarea; /* bitvector storage */
68     unsigned *work; /* pointer to work area within statesarea */
69     struct sset **outsarea; /* outarc-vector storage */
70     struct arcp *incarea; /* inchain storage */
71     struct cnfa *cnfa;
72     struct colormap *cm;
73     chr *lastpost; /* location of last cache-flushed success */
74     chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
75     struct sset *search; /* replacement-search-pointer memory */
76     int cptsmalloced; /* were the areas individually malloced? */
77     char *mallocarea; /* self, or master malloced area, or NULL */
78     };
79    
80     #define WORK 1 /* number of work bitvectors needed */
81    
82     /* setup for non-malloc allocation for small cases */
83     #define FEWSTATES 20 /* must be less than UBITS */
84     #define FEWCOLORS 15
85     struct smalldfa {
86     struct dfa dfa;
87     struct sset ssets[FEWSTATES*2];
88     unsigned statesarea[FEWSTATES*2 + WORK];
89     struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
90     struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
91     };
92     #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
93    
94    
95    
96     /* internal variables, bundled for easy passing around */
97     struct vars {
98     regex_t *re;
99     struct guts *g;
100     int eflags; /* copies of arguments */
101     size_t nmatch;
102     regmatch_t *pmatch;
103     rm_detail_t *details;
104     chr *start; /* start of string */
105     chr *stop; /* just past end of string */
106     int err; /* error code if any (0 none) */
107     regoff_t *mem; /* memory vector for backtracking */
108     struct smalldfa dfa1;
109     struct smalldfa dfa2;
110     };
111     #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
112     #define ISERR() VISERR(v)
113     #define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
114     #define ERR(e) VERR(v, e) /* record an error */
115     #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
116     #define OFF(p) ((p) - v->start)
117     #define LOFF(p) ((long)OFF(p))
118    
119    
120    
121     /*
122     * forward declarations
123     */
124     /* =====^!^===== begin forwards =====^!^===== */
125     /* automatically gathered by fwd; do not hand-edit */
126     /* === regexec.c === */
127     int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
128     static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
129     static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
130     static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **));
131     static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));
132     static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));
133     static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
134     static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
135     static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
136     static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
137     static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
138     static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
139     static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
140     static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
141     static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
142     /* === rege_dfa.c === */
143     static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));
144     static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));
145     static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));
146     static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
147     static VOID freedfa _ANSI_ARGS_((struct dfa *));
148     static unsigned hash _ANSI_ARGS_((unsigned *, int));
149     static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
150     static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
151     static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
152     static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
153     static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
154     /* automatically gathered by fwd; do not hand-edit */
155     /* =====^!^===== end forwards =====^!^===== */
156    
157    
158    
159     /*
160     - exec - match regular expression
161     ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
162     ^ size_t, regmatch_t [], int);
163     */
164     int
165     exec(re, string, len, details, nmatch, pmatch, flags)
166     regex_t *re;
167     CONST chr *string;
168     size_t len;
169     rm_detail_t *details;
170     size_t nmatch;
171     regmatch_t pmatch[];
172     int flags;
173     {
174     struct vars var;
175     register struct vars *v = &var;
176     int st;
177     size_t n;
178     int backref;
179     # define LOCALMAT 20
180     regmatch_t mat[LOCALMAT];
181     # define LOCALMEM 40
182     regoff_t mem[LOCALMEM];
183    
184     /* sanity checks */
185     if (re == NULL || string == NULL || re->re_magic != REMAGIC)
186     return REG_INVARG;
187     if (re->re_csize != sizeof(chr))
188     return REG_MIXED;
189    
190     /* setup */
191     v->re = re;
192     v->g = (struct guts *)re->re_guts;
193     if ((v->g->cflags&REG_EXPECT) && details == NULL)
194     return REG_INVARG;
195     if (v->g->info&REG_UIMPOSSIBLE)
196     return REG_NOMATCH;
197     backref = (v->g->info&REG_UBACKREF) ? 1 : 0;
198     v->eflags = flags;
199     if (v->g->cflags&REG_NOSUB)
200     nmatch = 0; /* override client */
201     v->nmatch = nmatch;
202     if (backref) {
203     /* need work area */
204     if (v->g->nsub + 1 <= LOCALMAT)
205     v->pmatch = mat;
206     else
207     v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
208     sizeof(regmatch_t));
209     if (v->pmatch == NULL)
210     return REG_ESPACE;
211     v->nmatch = v->g->nsub + 1;
212     } else
213     v->pmatch = pmatch;
214     v->details = details;
215     v->start = (chr *)string;
216     v->stop = (chr *)string + len;
217     v->err = 0;
218     if (backref) {
219     /* need retry memory */
220     assert(v->g->ntree >= 0);
221     n = (size_t)v->g->ntree;
222     if (n <= LOCALMEM)
223     v->mem = mem;
224     else
225     v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
226     if (v->mem == NULL) {
227     if (v->pmatch != pmatch && v->pmatch != mat)
228     FREE(v->pmatch);
229     return REG_ESPACE;
230     }
231     } else
232     v->mem = NULL;
233    
234     /* do it */
235     assert(v->g->tree != NULL);
236     if (backref)
237     st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
238     else
239     st = find(v, &v->g->tree->cnfa, &v->g->cmap);
240    
241     /* copy (portion of) match vector over if necessary */
242     if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
243     zapsubs(pmatch, nmatch);
244     n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
245     memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
246     }
247    
248     /* clean up */
249     if (v->pmatch != pmatch && v->pmatch != mat)
250     FREE(v->pmatch);
251     if (v->mem != NULL && v->mem != mem)
252     FREE(v->mem);
253     return st;
254     }
255    
256     /*
257     - find - find a match for the main NFA (no-complications case)
258     ^ static int find(struct vars *, struct cnfa *, struct colormap *);
259     */
260     static int
261     find(v, cnfa, cm)
262     struct vars *v;
263     struct cnfa *cnfa;
264     struct colormap *cm;
265     {
266     struct dfa *s;
267     struct dfa *d;
268     chr *begin;
269     chr *end = NULL;
270     chr *cold;
271     chr *open; /* open and close of range of possible starts */
272     chr *close;
273     int hitend;
274     int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
275    
276     /* first, a shot with the search RE */
277     s = newdfa(v, &v->g->search, cm, &v->dfa1);
278     assert(!(ISERR() && s != NULL));
279     NOERR();
280     MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
281     cold = NULL;
282     close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);
283     freedfa(s);
284     NOERR();
285     if (v->g->cflags&REG_EXPECT) {
286     assert(v->details != NULL);
287     if (cold != NULL)
288     v->details->rm_extend.rm_so = OFF(cold);
289     else
290     v->details->rm_extend.rm_so = OFF(v->stop);
291     v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
292     }
293     if (close == NULL) /* not found */
294     return REG_NOMATCH;
295     if (v->nmatch == 0) /* found, don't need exact location */
296     return REG_OKAY;
297    
298     /* find starting point and match */
299     assert(cold != NULL);
300     open = cold;
301     cold = NULL;
302     MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
303     d = newdfa(v, cnfa, cm, &v->dfa1);
304     assert(!(ISERR() && d != NULL));
305     NOERR();
306     for (begin = open; begin <= close; begin++) {
307     MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
308     if (shorter)
309     end = shortest(v, d, begin, begin, v->stop,
310     (chr **)NULL, &hitend);
311     else
312     end = longest(v, d, begin, v->stop, &hitend);
313     NOERR();
314     if (hitend && cold == NULL)
315     cold = begin;
316     if (end != NULL)
317     break; /* NOTE BREAK OUT */
318     }
319     assert(end != NULL); /* search RE succeeded so loop should */
320     freedfa(d);
321    
322     /* and pin down details */
323     assert(v->nmatch > 0);
324     v->pmatch[0].rm_so = OFF(begin);
325     v->pmatch[0].rm_eo = OFF(end);
326     if (v->g->cflags&REG_EXPECT) {
327     if (cold != NULL)
328     v->details->rm_extend.rm_so = OFF(cold);
329     else
330     v->details->rm_extend.rm_so = OFF(v->stop);
331     v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
332     }
333     if (v->nmatch == 1) /* no need for submatches */
334     return REG_OKAY;
335    
336     /* submatches */
337     zapsubs(v->pmatch, v->nmatch);
338     return dissect(v, v->g->tree, begin, end);
339     }
340    
341     /*
342     - cfind - find a match for the main NFA (with complications)
343     ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
344     */
345     static int
346     cfind(v, cnfa, cm)
347     struct vars *v;
348     struct cnfa *cnfa;
349     struct colormap *cm;
350     {
351     struct dfa *s;
352     struct dfa *d;
353     chr *cold;
354     int ret;
355    
356     s = newdfa(v, &v->g->search, cm, &v->dfa1);
357     NOERR();
358     d = newdfa(v, cnfa, cm, &v->dfa2);
359     if (ISERR()) {
360     assert(d == NULL);
361     freedfa(s);
362     return v->err;
363     }
364    
365     ret = cfindloop(v, cnfa, cm, d, s, &cold);
366    
367     freedfa(d);
368     freedfa(s);
369     NOERR();
370     if (v->g->cflags&REG_EXPECT) {
371     assert(v->details != NULL);
372     if (cold != NULL)
373     v->details->rm_extend.rm_so = OFF(cold);
374     else
375     v->details->rm_extend.rm_so = OFF(v->stop);
376     v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
377     }
378     return ret;
379     }
380    
381     /*
382     - cfindloop - the heart of cfind
383     ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
384     ^ struct dfa *, struct dfa *, chr **);
385     */
386     static int
387     cfindloop(v, cnfa, cm, d, s, coldp)
388     struct vars *v;
389     struct cnfa *cnfa;
390     struct colormap *cm;
391     struct dfa *d;
392     struct dfa *s;
393     chr **coldp; /* where to put coldstart pointer */
394     {
395     chr *begin;
396     chr *end;
397     chr *cold;
398     chr *open; /* open and close of range of possible starts */
399     chr *close;
400     chr *estart;
401     chr *estop;
402     int er;
403     int shorter = v->g->tree->flags&SHORTER;
404     int hitend;
405    
406     assert(d != NULL && s != NULL);
407     cold = NULL;
408     close = v->start;
409     do {
410     MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
411     close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);
412     if (close == NULL)
413     break; /* NOTE BREAK */
414     assert(cold != NULL);
415     open = cold;
416     cold = NULL;
417     MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
418     for (begin = open; begin <= close; begin++) {
419     MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
420     estart = begin;
421     estop = v->stop;
422     for (;;) {
423     if (shorter)
424     end = shortest(v, d, begin, estart,
425     estop, (chr **)NULL, &hitend);
426     else
427     end = longest(v, d, begin, estop,
428     &hitend);
429     if (hitend && cold == NULL)
430     cold = begin;
431     if (end == NULL)
432     break; /* NOTE BREAK OUT */
433     MDEBUG(("tentative end %ld\n", LOFF(end)));
434     zapsubs(v->pmatch, v->nmatch);
435     zapmem(v, v->g->tree);
436     er = cdissect(v, v->g->tree, begin, end);
437     if (er == REG_OKAY) {
438     if (v->nmatch > 0) {
439     v->pmatch[0].rm_so = OFF(begin);
440     v->pmatch[0].rm_eo = OFF(end);
441     }
442     *coldp = cold;
443     return REG_OKAY;
444     }
445     if (er != REG_NOMATCH) {
446     ERR(er);
447     return er;
448     }
449     if ((shorter) ? end == estop : end == begin) {
450     /* no point in trying again */
451     *coldp = cold;
452     return REG_NOMATCH;
453     }
454     /* go around and try again */
455     if (shorter)
456     estart = end + 1;
457     else
458     estop = end - 1;
459     }
460     }
461     } while (close < v->stop);
462    
463     *coldp = cold;
464     return REG_NOMATCH;
465     }
466    
467     /*
468     - zapsubs - initialize the subexpression matches to "no match"
469     ^ static VOID zapsubs(regmatch_t *, size_t);
470     */
471     static VOID
472     zapsubs(p, n)
473     regmatch_t *p;
474     size_t n;
475     {
476     size_t i;
477    
478     for (i = n-1; i > 0; i--) {
479     p[i].rm_so = -1;
480     p[i].rm_eo = -1;
481     }
482     }
483    
484     /*
485     - zapmem - initialize the retry memory of a subtree to zeros
486     ^ static VOID zapmem(struct vars *, struct subre *);
487     */
488     static VOID
489     zapmem(v, t)
490     struct vars *v;
491     struct subre *t;
492     {
493     if (t == NULL)
494     return;
495    
496     assert(v->mem != NULL);
497     v->mem[t->retry] = 0;
498     if (t->op == '(') {
499     assert(t->subno > 0);
500     v->pmatch[t->subno].rm_so = -1;
501     v->pmatch[t->subno].rm_eo = -1;
502     }
503    
504     if (t->left != NULL)
505     zapmem(v, t->left);
506     if (t->right != NULL)
507     zapmem(v, t->right);
508     }
509    
510     /*
511     - subset - set any subexpression relevant to a successful subre
512     ^ static VOID subset(struct vars *, struct subre *, chr *, chr *);
513     */
514     static VOID
515     subset(v, sub, begin, end)
516     struct vars *v;
517     struct subre *sub;
518     chr *begin;
519     chr *end;
520     {
521     int n = sub->subno;
522    
523     assert(n > 0);
524     if ((size_t)n >= v->nmatch)
525     return;
526    
527     MDEBUG(("setting %d\n", n));
528     v->pmatch[n].rm_so = OFF(begin);
529     v->pmatch[n].rm_eo = OFF(end);
530     }
531    
532     /*
533     - dissect - determine subexpression matches (uncomplicated case)
534     ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
535     */
536     static int /* regexec return code */
537     dissect(v, t, begin, end)
538     struct vars *v;
539     struct subre *t;
540     chr *begin; /* beginning of relevant substring */
541     chr *end; /* end of same */
542     {
543     assert(t != NULL);
544     MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
545    
546     switch (t->op) {
547     case '=': /* terminal node */
548     assert(t->left == NULL && t->right == NULL);
549     return REG_OKAY; /* no action, parent did the work */
550     break;
551     case '|': /* alternation */
552     assert(t->left != NULL);
553     return altdissect(v, t, begin, end);
554     break;
555     case 'b': /* back ref -- shouldn't be calling us! */
556     return REG_ASSERT;
557     break;
558     case '.': /* concatenation */
559     assert(t->left != NULL && t->right != NULL);
560     return condissect(v, t, begin, end);
561     break;
562     case '(': /* capturing */
563     assert(t->left != NULL && t->right == NULL);
564     assert(t->subno > 0);
565     subset(v, t, begin, end);
566     return dissect(v, t->left, begin, end);
567     break;
568     default:
569     return REG_ASSERT;
570     break;
571     }
572     }
573    
574     /*
575     - condissect - determine concatenation subexpression matches (uncomplicated)
576     ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
577     */
578     static int /* regexec return code */
579     condissect(v, t, begin, end)
580     struct vars *v;
581     struct subre *t;
582     chr *begin; /* beginning of relevant substring */
583     chr *end; /* end of same */
584     {
585     struct dfa *d;
586     struct dfa *d2;
587     chr *mid;
588     int i;
589     int shorter = (t->left->flags&SHORTER) ? 1 : 0;
590     chr *stop = (shorter) ? end : begin;
591    
592     assert(t->op == '.');
593     assert(t->left != NULL && t->left->cnfa.nstates > 0);
594     assert(t->right != NULL && t->right->cnfa.nstates > 0);
595    
596     d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
597     NOERR();
598     d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
599     if (ISERR()) {
600     assert(d2 == NULL);
601     freedfa(d);
602     return v->err;
603     }
604    
605     /* pick a tentative midpoint */
606     if (shorter)
607     mid = shortest(v, d, begin, begin, end, (chr **)NULL,
608     (int *)NULL);
609     else
610     mid = longest(v, d, begin, end, (int *)NULL);
611     if (mid == NULL) {
612     freedfa(d);
613     freedfa(d2);
614     return REG_ASSERT;
615     }
616     MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
617    
618     /* iterate until satisfaction or failure */
619     while (longest(v, d2, mid, end, (int *)NULL) != end) {
620     /* that midpoint didn't work, find a new one */
621     if (mid == stop) {
622     /* all possibilities exhausted! */
623     MDEBUG(("no midpoint!\n"));
624     freedfa(d);
625     freedfa(d2);
626     return REG_ASSERT;
627     }
628     if (shorter)
629     mid = shortest(v, d, begin, mid+1, end, (chr **)NULL,
630     (int *)NULL);
631     else
632     mid = longest(v, d, begin, mid-1, (int *)NULL);
633     if (mid == NULL) {
634     /* failed to find a new one! */
635     MDEBUG(("failed midpoint!\n"));
636     freedfa(d);
637     freedfa(d2);
638     return REG_ASSERT;
639     }
640     MDEBUG(("new midpoint %ld\n", LOFF(mid)));
641     }
642    
643     /* satisfaction */
644     MDEBUG(("successful\n"));
645     freedfa(d);
646     freedfa(d2);
647     i = dissect(v, t->left, begin, mid);
648     if (i != REG_OKAY)
649     return i;
650     return dissect(v, t->right, mid, end);
651     }
652    
653     /*
654     - altdissect - determine alternative subexpression matches (uncomplicated)
655     ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
656     */
657     static int /* regexec return code */
658     altdissect(v, t, begin, end)
659     struct vars *v;
660     struct subre *t;
661     chr *begin; /* beginning of relevant substring */
662     chr *end; /* end of same */
663     {
664     struct dfa *d;
665     int i;
666    
667     assert(t != NULL);
668     assert(t->op == '|');
669    
670     for (i = 0; t != NULL; t = t->right, i++) {
671     MDEBUG(("trying %dth\n", i));
672     assert(t->left != NULL && t->left->cnfa.nstates > 0);
673     d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
674     if (ISERR())
675     return v->err;
676     if (longest(v, d, begin, end, (int *)NULL) == end) {
677     MDEBUG(("success\n"));
678     freedfa(d);
679     return dissect(v, t->left, begin, end);
680     }
681     freedfa(d);
682     }
683     return REG_ASSERT; /* none of them matched?!? */
684     }
685    
686     /*
687     - cdissect - determine subexpression matches (with complications)
688     * The retry memory stores the offset of the trial midpoint from begin,
689     * plus 1 so that 0 uniquely means "clean slate".
690     ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
691     */
692     static int /* regexec return code */
693     cdissect(v, t, begin, end)
694     struct vars *v;
695     struct subre *t;
696     chr *begin; /* beginning of relevant substring */
697     chr *end; /* end of same */
698     {
699     int er;
700    
701     assert(t != NULL);
702     MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
703    
704     switch (t->op) {
705     case '=': /* terminal node */
706     assert(t->left == NULL && t->right == NULL);
707     return REG_OKAY; /* no action, parent did the work */
708     break;
709     case '|': /* alternation */
710     assert(t->left != NULL);
711     return caltdissect(v, t, begin, end);
712     break;
713     case 'b': /* back ref -- shouldn't be calling us! */
714     assert(t->left == NULL && t->right == NULL);
715     return cbrdissect(v, t, begin, end);
716     break;
717     case '.': /* concatenation */
718     assert(t->left != NULL && t->right != NULL);
719     return ccondissect(v, t, begin, end);
720     break;
721     case '(': /* capturing */
722     assert(t->left != NULL && t->right == NULL);
723     assert(t->subno > 0);
724     er = cdissect(v, t->left, begin, end);
725     if (er == REG_OKAY)
726     subset(v, t, begin, end);
727     return er;
728     break;
729     default:
730     return REG_ASSERT;
731     break;
732     }
733     }
734    
735     /*
736     - ccondissect - concatenation subexpression matches (with complications)
737     * The retry memory stores the offset of the trial midpoint from begin,
738     * plus 1 so that 0 uniquely means "clean slate".
739     ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
740     */
741     static int /* regexec return code */
742     ccondissect(v, t, begin, end)
743     struct vars *v;
744     struct subre *t;
745     chr *begin; /* beginning of relevant substring */
746     chr *end; /* end of same */
747     {
748     struct dfa *d;
749     struct dfa *d2;
750     chr *mid;
751     int er;
752    
753     assert(t->op == '.');
754     assert(t->left != NULL && t->left->cnfa.nstates > 0);
755     assert(t->right != NULL && t->right->cnfa.nstates > 0);
756    
757     if (t->left->flags&SHORTER) /* reverse scan */
758     return crevdissect(v, t, begin, end);
759    
760     d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
761     if (ISERR())
762     return v->err;
763     d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
764     if (ISERR()) {
765     freedfa(d);
766     return v->err;
767     }
768     MDEBUG(("cconcat %d\n", t->retry));
769    
770     /* pick a tentative midpoint */
771     if (v->mem[t->retry] == 0) {
772     mid = longest(v, d, begin, end, (int *)NULL);
773     if (mid == NULL) {
774     freedfa(d);
775     freedfa(d2);
776     return REG_NOMATCH;
777     }
778     MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
779     v->mem[t->retry] = (mid - begin) + 1;
780     } else {
781     mid = begin + (v->mem[t->retry] - 1);
782     MDEBUG(("working midpoint %ld\n", LOFF(mid)));
783     }
784    
785     /* iterate until satisfaction or failure */
786     for (;;) {
787     /* try this midpoint on for size */
788     er = cdissect(v, t->left, begin, mid);
789     if (er == REG_OKAY &&
790     longest(v, d2, mid, end, (int *)NULL) == end &&
791     (er = cdissect(v, t->right, mid, end)) ==
792     REG_OKAY)
793     break; /* NOTE BREAK OUT */
794     if (er != REG_OKAY && er != REG_NOMATCH) {
795     freedfa(d);
796     freedfa(d2);
797     return er;
798     }
799    
800     /* that midpoint didn't work, find a new one */
801     if (mid == begin) {
802     /* all possibilities exhausted */
803     MDEBUG(("%d no midpoint\n", t->retry));
804     freedfa(d);
805     freedfa(d2);
806     return REG_NOMATCH;
807     }
808     mid = longest(v, d, begin, mid-1, (int *)NULL);
809     if (mid == NULL) {
810     /* failed to find a new one */
811     MDEBUG(("%d failed midpoint\n", t->retry));
812     freedfa(d);
813     freedfa(d2);
814     return REG_NOMATCH;
815     }
816     MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
817     v->mem[t->retry] = (mid - begin) + 1;
818     zapmem(v, t->left);
819     zapmem(v, t->right);
820     }
821    
822     /* satisfaction */
823     MDEBUG(("successful\n"));
824     freedfa(d);
825     freedfa(d2);
826     return REG_OKAY;
827     }
828    
829     /*
830     - crevdissect - determine backref shortest-first subexpression matches
831     * The retry memory stores the offset of the trial midpoint from begin,
832     * plus 1 so that 0 uniquely means "clean slate".
833     ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
834     */
835     static int /* regexec return code */
836     crevdissect(v, t, begin, end)
837     struct vars *v;
838     struct subre *t;
839     chr *begin; /* beginning of relevant substring */
840     chr *end; /* end of same */
841     {
842     struct dfa *d;
843     struct dfa *d2;
844     chr *mid;
845     int er;
846    
847     assert(t->op == '.');
848     assert(t->left != NULL && t->left->cnfa.nstates > 0);
849     assert(t->right != NULL && t->right->cnfa.nstates > 0);
850     assert(t->left->flags&SHORTER);
851    
852     /* concatenation -- need to split the substring between parts */
853     d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
854     if (ISERR())
855     return v->err;
856     d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
857     if (ISERR()) {
858     freedfa(d);
859     return v->err;
860     }
861     MDEBUG(("crev %d\n", t->retry));
862    
863     /* pick a tentative midpoint */
864     if (v->mem[t->retry] == 0) {
865     mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);
866     if (mid == NULL) {
867     freedfa(d);
868     freedfa(d2);
869     return REG_NOMATCH;
870     }
871     MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
872     v->mem[t->retry] = (mid - begin) + 1;
873     } else {
874     mid = begin + (v->mem[t->retry] - 1);
875     MDEBUG(("working midpoint %ld\n", LOFF(mid)));
876     }
877    
878     /* iterate until satisfaction or failure */
879     for (;;) {
880     /* try this midpoint on for size */
881     er = cdissect(v, t->left, begin, mid);
882     if (er == REG_OKAY &&
883     longest(v, d2, mid, end, (int *)NULL) == end &&
884     (er = cdissect(v, t->right, mid, end)) ==
885     REG_OKAY)
886     break; /* NOTE BREAK OUT */
887     if (er != REG_OKAY && er != REG_NOMATCH) {
888     freedfa(d);
889     freedfa(d2);
890     return er;
891     }
892    
893     /* that midpoint didn't work, find a new one */
894     if (mid == end) {
895     /* all possibilities exhausted */
896     MDEBUG(("%d no midpoint\n", t->retry));
897     freedfa(d);
898     freedfa(d2);
899     return REG_NOMATCH;
900     }
901     mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);
902     if (mid == NULL) {
903     /* failed to find a new one */
904     MDEBUG(("%d failed midpoint\n", t->retry));
905     freedfa(d);
906     freedfa(d2);
907     return REG_NOMATCH;
908     }
909     MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
910     v->mem[t->retry] = (mid - begin) + 1;
911     zapmem(v, t->left);
912     zapmem(v, t->right);
913     }
914    
915     /* satisfaction */
916     MDEBUG(("successful\n"));
917     freedfa(d);
918     freedfa(d2);
919     return REG_OKAY;
920     }
921    
922     /*
923     - cbrdissect - determine backref subexpression matches
924     ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
925     */
926     static int /* regexec return code */
927     cbrdissect(v, t, begin, end)
928     struct vars *v;
929     struct subre *t;
930     chr *begin; /* beginning of relevant substring */
931     chr *end; /* end of same */
932     {
933     int i;
934     int n = t->subno;
935     size_t len;
936     chr *paren;
937     chr *p;
938     chr *stop;
939     int min = t->min;
940     int max = t->max;
941    
942     assert(t != NULL);
943     assert(t->op == 'b');
944     assert(n >= 0);
945     assert((size_t)n < v->nmatch);
946    
947     MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
948    
949     if (v->pmatch[n].rm_so == -1)
950     return REG_NOMATCH;
951     paren = v->start + v->pmatch[n].rm_so;
952     len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
953    
954     /* no room to maneuver -- retries are pointless */
955     if (v->mem[t->retry])
956     return REG_NOMATCH;
957     v->mem[t->retry] = 1;
958    
959     /* special-case zero-length string */
960     if (len == 0) {
961     if (begin == end)
962     return REG_OKAY;
963     return REG_NOMATCH;
964     }
965    
966     /* and too-short string */
967     assert(end >= begin);
968     if ((size_t)(end - begin) < len)
969     return REG_NOMATCH;
970     stop = end - len;
971    
972     /* count occurrences */
973     i = 0;
974     for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
975     if ((*v->g->compare)(paren, p, len) != 0)
976     break;
977     i++;
978     }
979     MDEBUG(("cbackref found %d\n", i));
980    
981     /* and sort it out */
982     if (p != end) /* didn't consume all of it */
983     return REG_NOMATCH;
984     if (min <= i && (i <= max || max == INFINITY))
985     return REG_OKAY;
986     return REG_NOMATCH; /* out of range */
987     }
988    
989     /*
990     - caltdissect - determine alternative subexpression matches (w. complications)
991     ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
992     */
993     static int /* regexec return code */
994     caltdissect(v, t, begin, end)
995     struct vars *v;
996     struct subre *t;
997     chr *begin; /* beginning of relevant substring */
998     chr *end; /* end of same */
999     {
1000     struct dfa *d;
1001     int er;
1002     # define UNTRIED 0 /* not yet tried at all */
1003     # define TRYING 1 /* top matched, trying submatches */
1004     # define TRIED 2 /* top didn't match or submatches exhausted */
1005    
1006     if (t == NULL)
1007     return REG_NOMATCH;
1008     assert(t->op == '|');
1009     if (v->mem[t->retry] == TRIED)
1010     return caltdissect(v, t->right, begin, end);
1011    
1012     MDEBUG(("calt n%d\n", t->retry));
1013     assert(t->left != NULL);
1014    
1015     if (v->mem[t->retry] == UNTRIED) {
1016     d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1017     if (ISERR())
1018     return v->err;
1019     if (longest(v, d, begin, end, (int *)NULL) != end) {
1020     freedfa(d);
1021     v->mem[t->retry] = TRIED;
1022     return caltdissect(v, t->right, begin, end);
1023     }
1024     freedfa(d);
1025     MDEBUG(("calt matched\n"));
1026     v->mem[t->retry] = TRYING;
1027     }
1028    
1029     er = cdissect(v, t->left, begin, end);
1030     if (er != REG_NOMATCH)
1031     return er;
1032    
1033     v->mem[t->retry] = TRIED;
1034     return caltdissect(v, t->right, begin, end);
1035     }
1036    
1037    
1038     #include "rege_dfa.c"
1039    
1040     /* End of regexec.c */

Properties

Name Value
svn:eol-style native
svn:keywords Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25