/[dtapublic]/projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regexec.c
ViewVC logotype

Contents of /projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regexec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 71 - (show annotations) (download)
Sat Nov 5 11:07:06 2016 UTC (7 years, 4 months ago) by dashley
File MIME type: text/plain
File size: 28398 byte(s)
Set EOL properties appropriately to facilitate simultaneous Linux and Windows development.
1 /* $Header$ */
2 /*
3 * re_*exec and friends - match REs
4 *
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 *
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
10 * thanks all of them.
11 *
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
16 *
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
19 *
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 */
32
33 #include "regguts.h"
34
35
36
37 /* lazy-DFA representation */
38 struct arcp { /* "pointer" to an outarc */
39 struct sset *ss;
40 color co;
41 };
42
43 struct sset { /* state set */
44 unsigned *states; /* pointer to bitvector */
45 unsigned hash; /* hash of bitvector */
46 # define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
47 # define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
48 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
49 int flags;
50 # define STARTER 01 /* the initial state set */
51 # define POSTSTATE 02 /* includes the goal state */
52 # define LOCKED 04 /* locked in cache */
53 # define NOPROGRESS 010 /* zero-progress state set */
54 struct arcp ins; /* chain of inarcs pointing here */
55 chr *lastseen; /* last entered on arrival here */
56 struct sset **outs; /* outarc vector indexed by color */
57 struct arcp *inchain; /* chain-pointer vector for outarcs */
58 };
59
60 struct dfa {
61 int nssets; /* size of cache */
62 int nssused; /* how many entries occupied yet */
63 int nstates; /* number of states */
64 int ncolors; /* length of outarc and inchain vectors */
65 int wordsper; /* length of state-set bitvectors */
66 struct sset *ssets; /* state-set cache */
67 unsigned *statesarea; /* bitvector storage */
68 unsigned *work; /* pointer to work area within statesarea */
69 struct sset **outsarea; /* outarc-vector storage */
70 struct arcp *incarea; /* inchain storage */
71 struct cnfa *cnfa;
72 struct colormap *cm;
73 chr *lastpost; /* location of last cache-flushed success */
74 chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
75 struct sset *search; /* replacement-search-pointer memory */
76 int cptsmalloced; /* were the areas individually malloced? */
77 char *mallocarea; /* self, or master malloced area, or NULL */
78 };
79
80 #define WORK 1 /* number of work bitvectors needed */
81
82 /* setup for non-malloc allocation for small cases */
83 #define FEWSTATES 20 /* must be less than UBITS */
84 #define FEWCOLORS 15
85 struct smalldfa {
86 struct dfa dfa;
87 struct sset ssets[FEWSTATES*2];
88 unsigned statesarea[FEWSTATES*2 + WORK];
89 struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
90 struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
91 };
92 #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
93
94
95
96 /* internal variables, bundled for easy passing around */
97 struct vars {
98 regex_t *re;
99 struct guts *g;
100 int eflags; /* copies of arguments */
101 size_t nmatch;
102 regmatch_t *pmatch;
103 rm_detail_t *details;
104 chr *start; /* start of string */
105 chr *stop; /* just past end of string */
106 int err; /* error code if any (0 none) */
107 regoff_t *mem; /* memory vector for backtracking */
108 struct smalldfa dfa1;
109 struct smalldfa dfa2;
110 };
111 #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
112 #define ISERR() VISERR(v)
113 #define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
114 #define ERR(e) VERR(v, e) /* record an error */
115 #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
116 #define OFF(p) ((p) - v->start)
117 #define LOFF(p) ((long)OFF(p))
118
119
120
121 /*
122 * forward declarations
123 */
124 /* =====^!^===== begin forwards =====^!^===== */
125 /* automatically gathered by fwd; do not hand-edit */
126 /* === regexec.c === */
127 int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
128 static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
129 static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
130 static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **));
131 static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));
132 static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));
133 static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
134 static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
135 static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
136 static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
137 static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
138 static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
139 static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
140 static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
141 static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
142 /* === rege_dfa.c === */
143 static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));
144 static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));
145 static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));
146 static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
147 static VOID freedfa _ANSI_ARGS_((struct dfa *));
148 static unsigned hash _ANSI_ARGS_((unsigned *, int));
149 static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
150 static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
151 static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
152 static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
153 static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
154 /* automatically gathered by fwd; do not hand-edit */
155 /* =====^!^===== end forwards =====^!^===== */
156
157
158
159 /*
160 - exec - match regular expression
161 ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
162 ^ size_t, regmatch_t [], int);
163 */
164 int
165 exec(re, string, len, details, nmatch, pmatch, flags)
166 regex_t *re;
167 CONST chr *string;
168 size_t len;
169 rm_detail_t *details;
170 size_t nmatch;
171 regmatch_t pmatch[];
172 int flags;
173 {
174 struct vars var;
175 register struct vars *v = &var;
176 int st;
177 size_t n;
178 int backref;
179 # define LOCALMAT 20
180 regmatch_t mat[LOCALMAT];
181 # define LOCALMEM 40
182 regoff_t mem[LOCALMEM];
183
184 /* sanity checks */
185 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
186 return REG_INVARG;
187 if (re->re_csize != sizeof(chr))
188 return REG_MIXED;
189
190 /* setup */
191 v->re = re;
192 v->g = (struct guts *)re->re_guts;
193 if ((v->g->cflags&REG_EXPECT) && details == NULL)
194 return REG_INVARG;
195 if (v->g->info&REG_UIMPOSSIBLE)
196 return REG_NOMATCH;
197 backref = (v->g->info&REG_UBACKREF) ? 1 : 0;
198 v->eflags = flags;
199 if (v->g->cflags&REG_NOSUB)
200 nmatch = 0; /* override client */
201 v->nmatch = nmatch;
202 if (backref) {
203 /* need work area */
204 if (v->g->nsub + 1 <= LOCALMAT)
205 v->pmatch = mat;
206 else
207 v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
208 sizeof(regmatch_t));
209 if (v->pmatch == NULL)
210 return REG_ESPACE;
211 v->nmatch = v->g->nsub + 1;
212 } else
213 v->pmatch = pmatch;
214 v->details = details;
215 v->start = (chr *)string;
216 v->stop = (chr *)string + len;
217 v->err = 0;
218 if (backref) {
219 /* need retry memory */
220 assert(v->g->ntree >= 0);
221 n = (size_t)v->g->ntree;
222 if (n <= LOCALMEM)
223 v->mem = mem;
224 else
225 v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
226 if (v->mem == NULL) {
227 if (v->pmatch != pmatch && v->pmatch != mat)
228 FREE(v->pmatch);
229 return REG_ESPACE;
230 }
231 } else
232 v->mem = NULL;
233
234 /* do it */
235 assert(v->g->tree != NULL);
236 if (backref)
237 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
238 else
239 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
240
241 /* copy (portion of) match vector over if necessary */
242 if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
243 zapsubs(pmatch, nmatch);
244 n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
245 memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
246 }
247
248 /* clean up */
249 if (v->pmatch != pmatch && v->pmatch != mat)
250 FREE(v->pmatch);
251 if (v->mem != NULL && v->mem != mem)
252 FREE(v->mem);
253 return st;
254 }
255
256 /*
257 - find - find a match for the main NFA (no-complications case)
258 ^ static int find(struct vars *, struct cnfa *, struct colormap *);
259 */
260 static int
261 find(v, cnfa, cm)
262 struct vars *v;
263 struct cnfa *cnfa;
264 struct colormap *cm;
265 {
266 struct dfa *s;
267 struct dfa *d;
268 chr *begin;
269 chr *end = NULL;
270 chr *cold;
271 chr *open; /* open and close of range of possible starts */
272 chr *close;
273 int hitend;
274 int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
275
276 /* first, a shot with the search RE */
277 s = newdfa(v, &v->g->search, cm, &v->dfa1);
278 assert(!(ISERR() && s != NULL));
279 NOERR();
280 MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
281 cold = NULL;
282 close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);
283 freedfa(s);
284 NOERR();
285 if (v->g->cflags&REG_EXPECT) {
286 assert(v->details != NULL);
287 if (cold != NULL)
288 v->details->rm_extend.rm_so = OFF(cold);
289 else
290 v->details->rm_extend.rm_so = OFF(v->stop);
291 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
292 }
293 if (close == NULL) /* not found */
294 return REG_NOMATCH;
295 if (v->nmatch == 0) /* found, don't need exact location */
296 return REG_OKAY;
297
298 /* find starting point and match */
299 assert(cold != NULL);
300 open = cold;
301 cold = NULL;
302 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
303 d = newdfa(v, cnfa, cm, &v->dfa1);
304 assert(!(ISERR() && d != NULL));
305 NOERR();
306 for (begin = open; begin <= close; begin++) {
307 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
308 if (shorter)
309 end = shortest(v, d, begin, begin, v->stop,
310 (chr **)NULL, &hitend);
311 else
312 end = longest(v, d, begin, v->stop, &hitend);
313 NOERR();
314 if (hitend && cold == NULL)
315 cold = begin;
316 if (end != NULL)
317 break; /* NOTE BREAK OUT */
318 }
319 assert(end != NULL); /* search RE succeeded so loop should */
320 freedfa(d);
321
322 /* and pin down details */
323 assert(v->nmatch > 0);
324 v->pmatch[0].rm_so = OFF(begin);
325 v->pmatch[0].rm_eo = OFF(end);
326 if (v->g->cflags&REG_EXPECT) {
327 if (cold != NULL)
328 v->details->rm_extend.rm_so = OFF(cold);
329 else
330 v->details->rm_extend.rm_so = OFF(v->stop);
331 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
332 }
333 if (v->nmatch == 1) /* no need for submatches */
334 return REG_OKAY;
335
336 /* submatches */
337 zapsubs(v->pmatch, v->nmatch);
338 return dissect(v, v->g->tree, begin, end);
339 }
340
341 /*
342 - cfind - find a match for the main NFA (with complications)
343 ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
344 */
345 static int
346 cfind(v, cnfa, cm)
347 struct vars *v;
348 struct cnfa *cnfa;
349 struct colormap *cm;
350 {
351 struct dfa *s;
352 struct dfa *d;
353 chr *cold;
354 int ret;
355
356 s = newdfa(v, &v->g->search, cm, &v->dfa1);
357 NOERR();
358 d = newdfa(v, cnfa, cm, &v->dfa2);
359 if (ISERR()) {
360 assert(d == NULL);
361 freedfa(s);
362 return v->err;
363 }
364
365 ret = cfindloop(v, cnfa, cm, d, s, &cold);
366
367 freedfa(d);
368 freedfa(s);
369 NOERR();
370 if (v->g->cflags&REG_EXPECT) {
371 assert(v->details != NULL);
372 if (cold != NULL)
373 v->details->rm_extend.rm_so = OFF(cold);
374 else
375 v->details->rm_extend.rm_so = OFF(v->stop);
376 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
377 }
378 return ret;
379 }
380
381 /*
382 - cfindloop - the heart of cfind
383 ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
384 ^ struct dfa *, struct dfa *, chr **);
385 */
386 static int
387 cfindloop(v, cnfa, cm, d, s, coldp)
388 struct vars *v;
389 struct cnfa *cnfa;
390 struct colormap *cm;
391 struct dfa *d;
392 struct dfa *s;
393 chr **coldp; /* where to put coldstart pointer */
394 {
395 chr *begin;
396 chr *end;
397 chr *cold;
398 chr *open; /* open and close of range of possible starts */
399 chr *close;
400 chr *estart;
401 chr *estop;
402 int er;
403 int shorter = v->g->tree->flags&SHORTER;
404 int hitend;
405
406 assert(d != NULL && s != NULL);
407 cold = NULL;
408 close = v->start;
409 do {
410 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
411 close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);
412 if (close == NULL)
413 break; /* NOTE BREAK */
414 assert(cold != NULL);
415 open = cold;
416 cold = NULL;
417 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
418 for (begin = open; begin <= close; begin++) {
419 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
420 estart = begin;
421 estop = v->stop;
422 for (;;) {
423 if (shorter)
424 end = shortest(v, d, begin, estart,
425 estop, (chr **)NULL, &hitend);
426 else
427 end = longest(v, d, begin, estop,
428 &hitend);
429 if (hitend && cold == NULL)
430 cold = begin;
431 if (end == NULL)
432 break; /* NOTE BREAK OUT */
433 MDEBUG(("tentative end %ld\n", LOFF(end)));
434 zapsubs(v->pmatch, v->nmatch);
435 zapmem(v, v->g->tree);
436 er = cdissect(v, v->g->tree, begin, end);
437 if (er == REG_OKAY) {
438 if (v->nmatch > 0) {
439 v->pmatch[0].rm_so = OFF(begin);
440 v->pmatch[0].rm_eo = OFF(end);
441 }
442 *coldp = cold;
443 return REG_OKAY;
444 }
445 if (er != REG_NOMATCH) {
446 ERR(er);
447 return er;
448 }
449 if ((shorter) ? end == estop : end == begin) {
450 /* no point in trying again */
451 *coldp = cold;
452 return REG_NOMATCH;
453 }
454 /* go around and try again */
455 if (shorter)
456 estart = end + 1;
457 else
458 estop = end - 1;
459 }
460 }
461 } while (close < v->stop);
462
463 *coldp = cold;
464 return REG_NOMATCH;
465 }
466
467 /*
468 - zapsubs - initialize the subexpression matches to "no match"
469 ^ static VOID zapsubs(regmatch_t *, size_t);
470 */
471 static VOID
472 zapsubs(p, n)
473 regmatch_t *p;
474 size_t n;
475 {
476 size_t i;
477
478 for (i = n-1; i > 0; i--) {
479 p[i].rm_so = -1;
480 p[i].rm_eo = -1;
481 }
482 }
483
484 /*
485 - zapmem - initialize the retry memory of a subtree to zeros
486 ^ static VOID zapmem(struct vars *, struct subre *);
487 */
488 static VOID
489 zapmem(v, t)
490 struct vars *v;
491 struct subre *t;
492 {
493 if (t == NULL)
494 return;
495
496 assert(v->mem != NULL);
497 v->mem[t->retry] = 0;
498 if (t->op == '(') {
499 assert(t->subno > 0);
500 v->pmatch[t->subno].rm_so = -1;
501 v->pmatch[t->subno].rm_eo = -1;
502 }
503
504 if (t->left != NULL)
505 zapmem(v, t->left);
506 if (t->right != NULL)
507 zapmem(v, t->right);
508 }
509
510 /*
511 - subset - set any subexpression relevant to a successful subre
512 ^ static VOID subset(struct vars *, struct subre *, chr *, chr *);
513 */
514 static VOID
515 subset(v, sub, begin, end)
516 struct vars *v;
517 struct subre *sub;
518 chr *begin;
519 chr *end;
520 {
521 int n = sub->subno;
522
523 assert(n > 0);
524 if ((size_t)n >= v->nmatch)
525 return;
526
527 MDEBUG(("setting %d\n", n));
528 v->pmatch[n].rm_so = OFF(begin);
529 v->pmatch[n].rm_eo = OFF(end);
530 }
531
532 /*
533 - dissect - determine subexpression matches (uncomplicated case)
534 ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
535 */
536 static int /* regexec return code */
537 dissect(v, t, begin, end)
538 struct vars *v;
539 struct subre *t;
540 chr *begin; /* beginning of relevant substring */
541 chr *end; /* end of same */
542 {
543 assert(t != NULL);
544 MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
545
546 switch (t->op) {
547 case '=': /* terminal node */
548 assert(t->left == NULL && t->right == NULL);
549 return REG_OKAY; /* no action, parent did the work */
550 break;
551 case '|': /* alternation */
552 assert(t->left != NULL);
553 return altdissect(v, t, begin, end);
554 break;
555 case 'b': /* back ref -- shouldn't be calling us! */
556 return REG_ASSERT;
557 break;
558 case '.': /* concatenation */
559 assert(t->left != NULL && t->right != NULL);
560 return condissect(v, t, begin, end);
561 break;
562 case '(': /* capturing */
563 assert(t->left != NULL && t->right == NULL);
564 assert(t->subno > 0);
565 subset(v, t, begin, end);
566 return dissect(v, t->left, begin, end);
567 break;
568 default:
569 return REG_ASSERT;
570 break;
571 }
572 }
573
574 /*
575 - condissect - determine concatenation subexpression matches (uncomplicated)
576 ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
577 */
578 static int /* regexec return code */
579 condissect(v, t, begin, end)
580 struct vars *v;
581 struct subre *t;
582 chr *begin; /* beginning of relevant substring */
583 chr *end; /* end of same */
584 {
585 struct dfa *d;
586 struct dfa *d2;
587 chr *mid;
588 int i;
589 int shorter = (t->left->flags&SHORTER) ? 1 : 0;
590 chr *stop = (shorter) ? end : begin;
591
592 assert(t->op == '.');
593 assert(t->left != NULL && t->left->cnfa.nstates > 0);
594 assert(t->right != NULL && t->right->cnfa.nstates > 0);
595
596 d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
597 NOERR();
598 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
599 if (ISERR()) {
600 assert(d2 == NULL);
601 freedfa(d);
602 return v->err;
603 }
604
605 /* pick a tentative midpoint */
606 if (shorter)
607 mid = shortest(v, d, begin, begin, end, (chr **)NULL,
608 (int *)NULL);
609 else
610 mid = longest(v, d, begin, end, (int *)NULL);
611 if (mid == NULL) {
612 freedfa(d);
613 freedfa(d2);
614 return REG_ASSERT;
615 }
616 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
617
618 /* iterate until satisfaction or failure */
619 while (longest(v, d2, mid, end, (int *)NULL) != end) {
620 /* that midpoint didn't work, find a new one */
621 if (mid == stop) {
622 /* all possibilities exhausted! */
623 MDEBUG(("no midpoint!\n"));
624 freedfa(d);
625 freedfa(d2);
626 return REG_ASSERT;
627 }
628 if (shorter)
629 mid = shortest(v, d, begin, mid+1, end, (chr **)NULL,
630 (int *)NULL);
631 else
632 mid = longest(v, d, begin, mid-1, (int *)NULL);
633 if (mid == NULL) {
634 /* failed to find a new one! */
635 MDEBUG(("failed midpoint!\n"));
636 freedfa(d);
637 freedfa(d2);
638 return REG_ASSERT;
639 }
640 MDEBUG(("new midpoint %ld\n", LOFF(mid)));
641 }
642
643 /* satisfaction */
644 MDEBUG(("successful\n"));
645 freedfa(d);
646 freedfa(d2);
647 i = dissect(v, t->left, begin, mid);
648 if (i != REG_OKAY)
649 return i;
650 return dissect(v, t->right, mid, end);
651 }
652
653 /*
654 - altdissect - determine alternative subexpression matches (uncomplicated)
655 ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
656 */
657 static int /* regexec return code */
658 altdissect(v, t, begin, end)
659 struct vars *v;
660 struct subre *t;
661 chr *begin; /* beginning of relevant substring */
662 chr *end; /* end of same */
663 {
664 struct dfa *d;
665 int i;
666
667 assert(t != NULL);
668 assert(t->op == '|');
669
670 for (i = 0; t != NULL; t = t->right, i++) {
671 MDEBUG(("trying %dth\n", i));
672 assert(t->left != NULL && t->left->cnfa.nstates > 0);
673 d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
674 if (ISERR())
675 return v->err;
676 if (longest(v, d, begin, end, (int *)NULL) == end) {
677 MDEBUG(("success\n"));
678 freedfa(d);
679 return dissect(v, t->left, begin, end);
680 }
681 freedfa(d);
682 }
683 return REG_ASSERT; /* none of them matched?!? */
684 }
685
686 /*
687 - cdissect - determine subexpression matches (with complications)
688 * The retry memory stores the offset of the trial midpoint from begin,
689 * plus 1 so that 0 uniquely means "clean slate".
690 ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
691 */
692 static int /* regexec return code */
693 cdissect(v, t, begin, end)
694 struct vars *v;
695 struct subre *t;
696 chr *begin; /* beginning of relevant substring */
697 chr *end; /* end of same */
698 {
699 int er;
700
701 assert(t != NULL);
702 MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
703
704 switch (t->op) {
705 case '=': /* terminal node */
706 assert(t->left == NULL && t->right == NULL);
707 return REG_OKAY; /* no action, parent did the work */
708 break;
709 case '|': /* alternation */
710 assert(t->left != NULL);
711 return caltdissect(v, t, begin, end);
712 break;
713 case 'b': /* back ref -- shouldn't be calling us! */
714 assert(t->left == NULL && t->right == NULL);
715 return cbrdissect(v, t, begin, end);
716 break;
717 case '.': /* concatenation */
718 assert(t->left != NULL && t->right != NULL);
719 return ccondissect(v, t, begin, end);
720 break;
721 case '(': /* capturing */
722 assert(t->left != NULL && t->right == NULL);
723 assert(t->subno > 0);
724 er = cdissect(v, t->left, begin, end);
725 if (er == REG_OKAY)
726 subset(v, t, begin, end);
727 return er;
728 break;
729 default:
730 return REG_ASSERT;
731 break;
732 }
733 }
734
735 /*
736 - ccondissect - concatenation subexpression matches (with complications)
737 * The retry memory stores the offset of the trial midpoint from begin,
738 * plus 1 so that 0 uniquely means "clean slate".
739 ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
740 */
741 static int /* regexec return code */
742 ccondissect(v, t, begin, end)
743 struct vars *v;
744 struct subre *t;
745 chr *begin; /* beginning of relevant substring */
746 chr *end; /* end of same */
747 {
748 struct dfa *d;
749 struct dfa *d2;
750 chr *mid;
751 int er;
752
753 assert(t->op == '.');
754 assert(t->left != NULL && t->left->cnfa.nstates > 0);
755 assert(t->right != NULL && t->right->cnfa.nstates > 0);
756
757 if (t->left->flags&SHORTER) /* reverse scan */
758 return crevdissect(v, t, begin, end);
759
760 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
761 if (ISERR())
762 return v->err;
763 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
764 if (ISERR()) {
765 freedfa(d);
766 return v->err;
767 }
768 MDEBUG(("cconcat %d\n", t->retry));
769
770 /* pick a tentative midpoint */
771 if (v->mem[t->retry] == 0) {
772 mid = longest(v, d, begin, end, (int *)NULL);
773 if (mid == NULL) {
774 freedfa(d);
775 freedfa(d2);
776 return REG_NOMATCH;
777 }
778 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
779 v->mem[t->retry] = (mid - begin) + 1;
780 } else {
781 mid = begin + (v->mem[t->retry] - 1);
782 MDEBUG(("working midpoint %ld\n", LOFF(mid)));
783 }
784
785 /* iterate until satisfaction or failure */
786 for (;;) {
787 /* try this midpoint on for size */
788 er = cdissect(v, t->left, begin, mid);
789 if (er == REG_OKAY &&
790 longest(v, d2, mid, end, (int *)NULL) == end &&
791 (er = cdissect(v, t->right, mid, end)) ==
792 REG_OKAY)
793 break; /* NOTE BREAK OUT */
794 if (er != REG_OKAY && er != REG_NOMATCH) {
795 freedfa(d);
796 freedfa(d2);
797 return er;
798 }
799
800 /* that midpoint didn't work, find a new one */
801 if (mid == begin) {
802 /* all possibilities exhausted */
803 MDEBUG(("%d no midpoint\n", t->retry));
804 freedfa(d);
805 freedfa(d2);
806 return REG_NOMATCH;
807 }
808 mid = longest(v, d, begin, mid-1, (int *)NULL);
809 if (mid == NULL) {
810 /* failed to find a new one */
811 MDEBUG(("%d failed midpoint\n", t->retry));
812 freedfa(d);
813 freedfa(d2);
814 return REG_NOMATCH;
815 }
816 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
817 v->mem[t->retry] = (mid - begin) + 1;
818 zapmem(v, t->left);
819 zapmem(v, t->right);
820 }
821
822 /* satisfaction */
823 MDEBUG(("successful\n"));
824 freedfa(d);
825 freedfa(d2);
826 return REG_OKAY;
827 }
828
829 /*
830 - crevdissect - determine backref shortest-first subexpression matches
831 * The retry memory stores the offset of the trial midpoint from begin,
832 * plus 1 so that 0 uniquely means "clean slate".
833 ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
834 */
835 static int /* regexec return code */
836 crevdissect(v, t, begin, end)
837 struct vars *v;
838 struct subre *t;
839 chr *begin; /* beginning of relevant substring */
840 chr *end; /* end of same */
841 {
842 struct dfa *d;
843 struct dfa *d2;
844 chr *mid;
845 int er;
846
847 assert(t->op == '.');
848 assert(t->left != NULL && t->left->cnfa.nstates > 0);
849 assert(t->right != NULL && t->right->cnfa.nstates > 0);
850 assert(t->left->flags&SHORTER);
851
852 /* concatenation -- need to split the substring between parts */
853 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
854 if (ISERR())
855 return v->err;
856 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
857 if (ISERR()) {
858 freedfa(d);
859 return v->err;
860 }
861 MDEBUG(("crev %d\n", t->retry));
862
863 /* pick a tentative midpoint */
864 if (v->mem[t->retry] == 0) {
865 mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);
866 if (mid == NULL) {
867 freedfa(d);
868 freedfa(d2);
869 return REG_NOMATCH;
870 }
871 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
872 v->mem[t->retry] = (mid - begin) + 1;
873 } else {
874 mid = begin + (v->mem[t->retry] - 1);
875 MDEBUG(("working midpoint %ld\n", LOFF(mid)));
876 }
877
878 /* iterate until satisfaction or failure */
879 for (;;) {
880 /* try this midpoint on for size */
881 er = cdissect(v, t->left, begin, mid);
882 if (er == REG_OKAY &&
883 longest(v, d2, mid, end, (int *)NULL) == end &&
884 (er = cdissect(v, t->right, mid, end)) ==
885 REG_OKAY)
886 break; /* NOTE BREAK OUT */
887 if (er != REG_OKAY && er != REG_NOMATCH) {
888 freedfa(d);
889 freedfa(d2);
890 return er;
891 }
892
893 /* that midpoint didn't work, find a new one */
894 if (mid == end) {
895 /* all possibilities exhausted */
896 MDEBUG(("%d no midpoint\n", t->retry));
897 freedfa(d);
898 freedfa(d2);
899 return REG_NOMATCH;
900 }
901 mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);
902 if (mid == NULL) {
903 /* failed to find a new one */
904 MDEBUG(("%d failed midpoint\n", t->retry));
905 freedfa(d);
906 freedfa(d2);
907 return REG_NOMATCH;
908 }
909 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
910 v->mem[t->retry] = (mid - begin) + 1;
911 zapmem(v, t->left);
912 zapmem(v, t->right);
913 }
914
915 /* satisfaction */
916 MDEBUG(("successful\n"));
917 freedfa(d);
918 freedfa(d2);
919 return REG_OKAY;
920 }
921
922 /*
923 - cbrdissect - determine backref subexpression matches
924 ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
925 */
926 static int /* regexec return code */
927 cbrdissect(v, t, begin, end)
928 struct vars *v;
929 struct subre *t;
930 chr *begin; /* beginning of relevant substring */
931 chr *end; /* end of same */
932 {
933 int i;
934 int n = t->subno;
935 size_t len;
936 chr *paren;
937 chr *p;
938 chr *stop;
939 int min = t->min;
940 int max = t->max;
941
942 assert(t != NULL);
943 assert(t->op == 'b');
944 assert(n >= 0);
945 assert((size_t)n < v->nmatch);
946
947 MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
948
949 if (v->pmatch[n].rm_so == -1)
950 return REG_NOMATCH;
951 paren = v->start + v->pmatch[n].rm_so;
952 len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
953
954 /* no room to maneuver -- retries are pointless */
955 if (v->mem[t->retry])
956 return REG_NOMATCH;
957 v->mem[t->retry] = 1;
958
959 /* special-case zero-length string */
960 if (len == 0) {
961 if (begin == end)
962 return REG_OKAY;
963 return REG_NOMATCH;
964 }
965
966 /* and too-short string */
967 assert(end >= begin);
968 if ((size_t)(end - begin) < len)
969 return REG_NOMATCH;
970 stop = end - len;
971
972 /* count occurrences */
973 i = 0;
974 for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
975 if ((*v->g->compare)(paren, p, len) != 0)
976 break;
977 i++;
978 }
979 MDEBUG(("cbackref found %d\n", i));
980
981 /* and sort it out */
982 if (p != end) /* didn't consume all of it */
983 return REG_NOMATCH;
984 if (min <= i && (i <= max || max == INFINITY))
985 return REG_OKAY;
986 return REG_NOMATCH; /* out of range */
987 }
988
989 /*
990 - caltdissect - determine alternative subexpression matches (w. complications)
991 ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
992 */
993 static int /* regexec return code */
994 caltdissect(v, t, begin, end)
995 struct vars *v;
996 struct subre *t;
997 chr *begin; /* beginning of relevant substring */
998 chr *end; /* end of same */
999 {
1000 struct dfa *d;
1001 int er;
1002 # define UNTRIED 0 /* not yet tried at all */
1003 # define TRYING 1 /* top matched, trying submatches */
1004 # define TRIED 2 /* top didn't match or submatches exhausted */
1005
1006 if (t == NULL)
1007 return REG_NOMATCH;
1008 assert(t->op == '|');
1009 if (v->mem[t->retry] == TRIED)
1010 return caltdissect(v, t->right, begin, end);
1011
1012 MDEBUG(("calt n%d\n", t->retry));
1013 assert(t->left != NULL);
1014
1015 if (v->mem[t->retry] == UNTRIED) {
1016 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1017 if (ISERR())
1018 return v->err;
1019 if (longest(v, d, begin, end, (int *)NULL) != end) {
1020 freedfa(d);
1021 v->mem[t->retry] = TRIED;
1022 return caltdissect(v, t->right, begin, end);
1023 }
1024 freedfa(d);
1025 MDEBUG(("calt matched\n"));
1026 v->mem[t->retry] = TRYING;
1027 }
1028
1029 er = cdissect(v, t->left, begin, end);
1030 if (er != REG_NOMATCH)
1031 return er;
1032
1033 v->mem[t->retry] = TRIED;
1034 return caltdissect(v, t->right, begin, end);
1035 }
1036
1037
1038 #include "rege_dfa.c"
1039
1040 /* End of regexec.c */

Properties

Name Value
svn:eol-style native
svn:keywords Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25