/[dtapublic]/projs/trunk/shared_source/tcl_base/regexec.c
ViewVC logotype

Contents of /projs/trunk/shared_source/tcl_base/regexec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 42 - (show annotations) (download)
Fri Oct 14 01:50:00 2016 UTC (8 years, 2 months ago) by dashley
File MIME type: text/plain
File size: 29764 byte(s)
Move shared source code to commonize.
1 /* $Header: /cvsroot/esrg/sfesrg/esrgpcpj/shared/tcl_base/regexec.c,v 1.1.1.1 2001/06/13 04:32:26 dtashley Exp $ */
2
3 /*
4 * re_*exec and friends - match REs
5 *
6 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 *
8 * Development of this software was funded, in part, by Cray Research Inc.,
9 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
10 * Corporation, none of whom are responsible for the results. The author
11 * thanks all of them.
12 *
13 * Redistribution and use in source and binary forms -- with or without
14 * modification -- are permitted for any purpose, provided that
15 * redistributions in source form retain this entire copyright notice and
16 * indicate the origin and nature of any modifications.
17 *
18 * I'd appreciate being given credit for this package in the documentation
19 * of software which uses it, but that is not a requirement.
20 *
21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
22 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
23 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
24 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 */
33
34 #include "regguts.h"
35
36
37
38 /* lazy-DFA representation */
39 struct arcp { /* "pointer" to an outarc */
40 struct sset *ss;
41 color co;
42 };
43
44 struct sset { /* state set */
45 unsigned *states; /* pointer to bitvector */
46 unsigned hash; /* hash of bitvector */
47 # define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
48 # define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
49 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
50 int flags;
51 # define STARTER 01 /* the initial state set */
52 # define POSTSTATE 02 /* includes the goal state */
53 # define LOCKED 04 /* locked in cache */
54 # define NOPROGRESS 010 /* zero-progress state set */
55 struct arcp ins; /* chain of inarcs pointing here */
56 chr *lastseen; /* last entered on arrival here */
57 struct sset **outs; /* outarc vector indexed by color */
58 struct arcp *inchain; /* chain-pointer vector for outarcs */
59 };
60
61 struct dfa {
62 int nssets; /* size of cache */
63 int nssused; /* how many entries occupied yet */
64 int nstates; /* number of states */
65 int ncolors; /* length of outarc and inchain vectors */
66 int wordsper; /* length of state-set bitvectors */
67 struct sset *ssets; /* state-set cache */
68 unsigned *statesarea; /* bitvector storage */
69 unsigned *work; /* pointer to work area within statesarea */
70 struct sset **outsarea; /* outarc-vector storage */
71 struct arcp *incarea; /* inchain storage */
72 struct cnfa *cnfa;
73 struct colormap *cm;
74 chr *lastpost; /* location of last cache-flushed success */
75 chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
76 struct sset *search; /* replacement-search-pointer memory */
77 int cptsmalloced; /* were the areas individually malloced? */
78 char *mallocarea; /* self, or master malloced area, or NULL */
79 };
80
81 #define WORK 1 /* number of work bitvectors needed */
82
83 /* setup for non-malloc allocation for small cases */
84 #define FEWSTATES 20 /* must be less than UBITS */
85 #define FEWCOLORS 15
86 struct smalldfa {
87 struct dfa dfa;
88 struct sset ssets[FEWSTATES*2];
89 unsigned statesarea[FEWSTATES*2 + WORK];
90 struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
91 struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
92 };
93 #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
94
95
96
97 /* internal variables, bundled for easy passing around */
98 struct vars {
99 regex_t *re;
100 struct guts *g;
101 int eflags; /* copies of arguments */
102 size_t nmatch;
103 regmatch_t *pmatch;
104 rm_detail_t *details;
105 chr *start; /* start of string */
106 chr *stop; /* just past end of string */
107 int err; /* error code if any (0 none) */
108 regoff_t *mem; /* memory vector for backtracking */
109 struct smalldfa dfa1;
110 struct smalldfa dfa2;
111 };
112 #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
113 #define ISERR() VISERR(v)
114 #define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
115 #define ERR(e) VERR(v, e) /* record an error */
116 #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
117 #define OFF(p) ((p) - v->start)
118 #define LOFF(p) ((long)OFF(p))
119
120
121
122 /*
123 * forward declarations
124 */
125 /* =====^!^===== begin forwards =====^!^===== */
126 /* automatically gathered by fwd; do not hand-edit */
127 /* === regexec.c === */
128 int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
129 static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
130 static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
131 static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **));
132 static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));
133 static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));
134 static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
135 static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
136 static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
137 static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
138 static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
139 static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
140 static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
141 static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
142 static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
143 /* === rege_dfa.c === */
144 static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));
145 static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));
146 static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));
147 static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
148 static VOID freedfa _ANSI_ARGS_((struct dfa *));
149 static unsigned hash _ANSI_ARGS_((unsigned *, int));
150 static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
151 static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
152 static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
153 static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
154 static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
155 /* automatically gathered by fwd; do not hand-edit */
156 /* =====^!^===== end forwards =====^!^===== */
157
158
159
160 /*
161 - exec - match regular expression
162 ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
163 ^ size_t, regmatch_t [], int);
164 */
165 int
166 exec(re, string, len, details, nmatch, pmatch, flags)
167 regex_t *re;
168 CONST chr *string;
169 size_t len;
170 rm_detail_t *details;
171 size_t nmatch;
172 regmatch_t pmatch[];
173 int flags;
174 {
175 struct vars var;
176 register struct vars *v = &var;
177 int st;
178 size_t n;
179 int backref;
180 # define LOCALMAT 20
181 regmatch_t mat[LOCALMAT];
182 # define LOCALMEM 40
183 regoff_t mem[LOCALMEM];
184
185 /* sanity checks */
186 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
187 return REG_INVARG;
188 if (re->re_csize != sizeof(chr))
189 return REG_MIXED;
190
191 /* setup */
192 v->re = re;
193 v->g = (struct guts *)re->re_guts;
194 if ((v->g->cflags&REG_EXPECT) && details == NULL)
195 return REG_INVARG;
196 if (v->g->info&REG_UIMPOSSIBLE)
197 return REG_NOMATCH;
198 backref = (v->g->info&REG_UBACKREF) ? 1 : 0;
199 v->eflags = flags;
200 if (v->g->cflags&REG_NOSUB)
201 nmatch = 0; /* override client */
202 v->nmatch = nmatch;
203 if (backref) {
204 /* need work area */
205 if (v->g->nsub + 1 <= LOCALMAT)
206 v->pmatch = mat;
207 else
208 v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
209 sizeof(regmatch_t));
210 if (v->pmatch == NULL)
211 return REG_ESPACE;
212 v->nmatch = v->g->nsub + 1;
213 } else
214 v->pmatch = pmatch;
215 v->details = details;
216 v->start = (chr *)string;
217 v->stop = (chr *)string + len;
218 v->err = 0;
219 if (backref) {
220 /* need retry memory */
221 assert(v->g->ntree >= 0);
222 n = (size_t)v->g->ntree;
223 if (n <= LOCALMEM)
224 v->mem = mem;
225 else
226 v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
227 if (v->mem == NULL) {
228 if (v->pmatch != pmatch && v->pmatch != mat)
229 FREE(v->pmatch);
230 return REG_ESPACE;
231 }
232 } else
233 v->mem = NULL;
234
235 /* do it */
236 assert(v->g->tree != NULL);
237 if (backref)
238 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
239 else
240 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
241
242 /* copy (portion of) match vector over if necessary */
243 if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
244 zapsubs(pmatch, nmatch);
245 n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
246 memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
247 }
248
249 /* clean up */
250 if (v->pmatch != pmatch && v->pmatch != mat)
251 FREE(v->pmatch);
252 if (v->mem != NULL && v->mem != mem)
253 FREE(v->mem);
254 return st;
255 }
256
257 /*
258 - find - find a match for the main NFA (no-complications case)
259 ^ static int find(struct vars *, struct cnfa *, struct colormap *);
260 */
261 static int
262 find(v, cnfa, cm)
263 struct vars *v;
264 struct cnfa *cnfa;
265 struct colormap *cm;
266 {
267 struct dfa *s;
268 struct dfa *d;
269 chr *begin;
270 chr *end = NULL;
271 chr *cold;
272 chr *open; /* open and close of range of possible starts */
273 chr *close;
274 int hitend;
275 int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
276
277 /* first, a shot with the search RE */
278 s = newdfa(v, &v->g->search, cm, &v->dfa1);
279 assert(!(ISERR() && s != NULL));
280 NOERR();
281 MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
282 cold = NULL;
283 close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);
284 freedfa(s);
285 NOERR();
286 if (v->g->cflags&REG_EXPECT) {
287 assert(v->details != NULL);
288 if (cold != NULL)
289 v->details->rm_extend.rm_so = OFF(cold);
290 else
291 v->details->rm_extend.rm_so = OFF(v->stop);
292 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
293 }
294 if (close == NULL) /* not found */
295 return REG_NOMATCH;
296 if (v->nmatch == 0) /* found, don't need exact location */
297 return REG_OKAY;
298
299 /* find starting point and match */
300 assert(cold != NULL);
301 open = cold;
302 cold = NULL;
303 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
304 d = newdfa(v, cnfa, cm, &v->dfa1);
305 assert(!(ISERR() && d != NULL));
306 NOERR();
307 for (begin = open; begin <= close; begin++) {
308 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
309 if (shorter)
310 end = shortest(v, d, begin, begin, v->stop,
311 (chr **)NULL, &hitend);
312 else
313 end = longest(v, d, begin, v->stop, &hitend);
314 NOERR();
315 if (hitend && cold == NULL)
316 cold = begin;
317 if (end != NULL)
318 break; /* NOTE BREAK OUT */
319 }
320 assert(end != NULL); /* search RE succeeded so loop should */
321 freedfa(d);
322
323 /* and pin down details */
324 assert(v->nmatch > 0);
325 v->pmatch[0].rm_so = OFF(begin);
326 v->pmatch[0].rm_eo = OFF(end);
327 if (v->g->cflags&REG_EXPECT) {
328 if (cold != NULL)
329 v->details->rm_extend.rm_so = OFF(cold);
330 else
331 v->details->rm_extend.rm_so = OFF(v->stop);
332 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
333 }
334 if (v->nmatch == 1) /* no need for submatches */
335 return REG_OKAY;
336
337 /* submatches */
338 zapsubs(v->pmatch, v->nmatch);
339 return dissect(v, v->g->tree, begin, end);
340 }
341
342 /*
343 - cfind - find a match for the main NFA (with complications)
344 ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
345 */
346 static int
347 cfind(v, cnfa, cm)
348 struct vars *v;
349 struct cnfa *cnfa;
350 struct colormap *cm;
351 {
352 struct dfa *s;
353 struct dfa *d;
354 chr *cold;
355 int ret;
356
357 s = newdfa(v, &v->g->search, cm, &v->dfa1);
358 NOERR();
359 d = newdfa(v, cnfa, cm, &v->dfa2);
360 if (ISERR()) {
361 assert(d == NULL);
362 freedfa(s);
363 return v->err;
364 }
365
366 ret = cfindloop(v, cnfa, cm, d, s, &cold);
367
368 freedfa(d);
369 freedfa(s);
370 NOERR();
371 if (v->g->cflags&REG_EXPECT) {
372 assert(v->details != NULL);
373 if (cold != NULL)
374 v->details->rm_extend.rm_so = OFF(cold);
375 else
376 v->details->rm_extend.rm_so = OFF(v->stop);
377 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
378 }
379 return ret;
380 }
381
382 /*
383 - cfindloop - the heart of cfind
384 ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
385 ^ struct dfa *, struct dfa *, chr **);
386 */
387 static int
388 cfindloop(v, cnfa, cm, d, s, coldp)
389 struct vars *v;
390 struct cnfa *cnfa;
391 struct colormap *cm;
392 struct dfa *d;
393 struct dfa *s;
394 chr **coldp; /* where to put coldstart pointer */
395 {
396 chr *begin;
397 chr *end;
398 chr *cold;
399 chr *open; /* open and close of range of possible starts */
400 chr *close;
401 chr *estart;
402 chr *estop;
403 int er;
404 int shorter = v->g->tree->flags&SHORTER;
405 int hitend;
406
407 assert(d != NULL && s != NULL);
408 cold = NULL;
409 close = v->start;
410 do {
411 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
412 close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);
413 if (close == NULL)
414 break; /* NOTE BREAK */
415 assert(cold != NULL);
416 open = cold;
417 cold = NULL;
418 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
419 for (begin = open; begin <= close; begin++) {
420 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
421 estart = begin;
422 estop = v->stop;
423 for (;;) {
424 if (shorter)
425 end = shortest(v, d, begin, estart,
426 estop, (chr **)NULL, &hitend);
427 else
428 end = longest(v, d, begin, estop,
429 &hitend);
430 if (hitend && cold == NULL)
431 cold = begin;
432 if (end == NULL)
433 break; /* NOTE BREAK OUT */
434 MDEBUG(("tentative end %ld\n", LOFF(end)));
435 zapsubs(v->pmatch, v->nmatch);
436 zapmem(v, v->g->tree);
437 er = cdissect(v, v->g->tree, begin, end);
438 if (er == REG_OKAY) {
439 if (v->nmatch > 0) {
440 v->pmatch[0].rm_so = OFF(begin);
441 v->pmatch[0].rm_eo = OFF(end);
442 }
443 *coldp = cold;
444 return REG_OKAY;
445 }
446 if (er != REG_NOMATCH) {
447 ERR(er);
448 return er;
449 }
450 if ((shorter) ? end == estop : end == begin) {
451 /* no point in trying again */
452 *coldp = cold;
453 return REG_NOMATCH;
454 }
455 /* go around and try again */
456 if (shorter)
457 estart = end + 1;
458 else
459 estop = end - 1;
460 }
461 }
462 } while (close < v->stop);
463
464 *coldp = cold;
465 return REG_NOMATCH;
466 }
467
468 /*
469 - zapsubs - initialize the subexpression matches to "no match"
470 ^ static VOID zapsubs(regmatch_t *, size_t);
471 */
472 static VOID
473 zapsubs(p, n)
474 regmatch_t *p;
475 size_t n;
476 {
477 size_t i;
478
479 for (i = n-1; i > 0; i--) {
480 p[i].rm_so = -1;
481 p[i].rm_eo = -1;
482 }
483 }
484
485 /*
486 - zapmem - initialize the retry memory of a subtree to zeros
487 ^ static VOID zapmem(struct vars *, struct subre *);
488 */
489 static VOID
490 zapmem(v, t)
491 struct vars *v;
492 struct subre *t;
493 {
494 if (t == NULL)
495 return;
496
497 assert(v->mem != NULL);
498 v->mem[t->retry] = 0;
499 if (t->op == '(') {
500 assert(t->subno > 0);
501 v->pmatch[t->subno].rm_so = -1;
502 v->pmatch[t->subno].rm_eo = -1;
503 }
504
505 if (t->left != NULL)
506 zapmem(v, t->left);
507 if (t->right != NULL)
508 zapmem(v, t->right);
509 }
510
511 /*
512 - subset - set any subexpression relevant to a successful subre
513 ^ static VOID subset(struct vars *, struct subre *, chr *, chr *);
514 */
515 static VOID
516 subset(v, sub, begin, end)
517 struct vars *v;
518 struct subre *sub;
519 chr *begin;
520 chr *end;
521 {
522 int n = sub->subno;
523
524 assert(n > 0);
525 if ((size_t)n >= v->nmatch)
526 return;
527
528 MDEBUG(("setting %d\n", n));
529 v->pmatch[n].rm_so = OFF(begin);
530 v->pmatch[n].rm_eo = OFF(end);
531 }
532
533 /*
534 - dissect - determine subexpression matches (uncomplicated case)
535 ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
536 */
537 static int /* regexec return code */
538 dissect(v, t, begin, end)
539 struct vars *v;
540 struct subre *t;
541 chr *begin; /* beginning of relevant substring */
542 chr *end; /* end of same */
543 {
544 assert(t != NULL);
545 MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
546
547 switch (t->op) {
548 case '=': /* terminal node */
549 assert(t->left == NULL && t->right == NULL);
550 return REG_OKAY; /* no action, parent did the work */
551 break;
552 case '|': /* alternation */
553 assert(t->left != NULL);
554 return altdissect(v, t, begin, end);
555 break;
556 case 'b': /* back ref -- shouldn't be calling us! */
557 return REG_ASSERT;
558 break;
559 case '.': /* concatenation */
560 assert(t->left != NULL && t->right != NULL);
561 return condissect(v, t, begin, end);
562 break;
563 case '(': /* capturing */
564 assert(t->left != NULL && t->right == NULL);
565 assert(t->subno > 0);
566 subset(v, t, begin, end);
567 return dissect(v, t->left, begin, end);
568 break;
569 default:
570 return REG_ASSERT;
571 break;
572 }
573 }
574
575 /*
576 - condissect - determine concatenation subexpression matches (uncomplicated)
577 ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
578 */
579 static int /* regexec return code */
580 condissect(v, t, begin, end)
581 struct vars *v;
582 struct subre *t;
583 chr *begin; /* beginning of relevant substring */
584 chr *end; /* end of same */
585 {
586 struct dfa *d;
587 struct dfa *d2;
588 chr *mid;
589 int i;
590 int shorter = (t->left->flags&SHORTER) ? 1 : 0;
591 chr *stop = (shorter) ? end : begin;
592
593 assert(t->op == '.');
594 assert(t->left != NULL && t->left->cnfa.nstates > 0);
595 assert(t->right != NULL && t->right->cnfa.nstates > 0);
596
597 d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
598 NOERR();
599 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
600 if (ISERR()) {
601 assert(d2 == NULL);
602 freedfa(d);
603 return v->err;
604 }
605
606 /* pick a tentative midpoint */
607 if (shorter)
608 mid = shortest(v, d, begin, begin, end, (chr **)NULL,
609 (int *)NULL);
610 else
611 mid = longest(v, d, begin, end, (int *)NULL);
612 if (mid == NULL) {
613 freedfa(d);
614 freedfa(d2);
615 return REG_ASSERT;
616 }
617 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
618
619 /* iterate until satisfaction or failure */
620 while (longest(v, d2, mid, end, (int *)NULL) != end) {
621 /* that midpoint didn't work, find a new one */
622 if (mid == stop) {
623 /* all possibilities exhausted! */
624 MDEBUG(("no midpoint!\n"));
625 freedfa(d);
626 freedfa(d2);
627 return REG_ASSERT;
628 }
629 if (shorter)
630 mid = shortest(v, d, begin, mid+1, end, (chr **)NULL,
631 (int *)NULL);
632 else
633 mid = longest(v, d, begin, mid-1, (int *)NULL);
634 if (mid == NULL) {
635 /* failed to find a new one! */
636 MDEBUG(("failed midpoint!\n"));
637 freedfa(d);
638 freedfa(d2);
639 return REG_ASSERT;
640 }
641 MDEBUG(("new midpoint %ld\n", LOFF(mid)));
642 }
643
644 /* satisfaction */
645 MDEBUG(("successful\n"));
646 freedfa(d);
647 freedfa(d2);
648 i = dissect(v, t->left, begin, mid);
649 if (i != REG_OKAY)
650 return i;
651 return dissect(v, t->right, mid, end);
652 }
653
654 /*
655 - altdissect - determine alternative subexpression matches (uncomplicated)
656 ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
657 */
658 static int /* regexec return code */
659 altdissect(v, t, begin, end)
660 struct vars *v;
661 struct subre *t;
662 chr *begin; /* beginning of relevant substring */
663 chr *end; /* end of same */
664 {
665 struct dfa *d;
666 int i;
667
668 assert(t != NULL);
669 assert(t->op == '|');
670
671 for (i = 0; t != NULL; t = t->right, i++) {
672 MDEBUG(("trying %dth\n", i));
673 assert(t->left != NULL && t->left->cnfa.nstates > 0);
674 d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
675 if (ISERR())
676 return v->err;
677 if (longest(v, d, begin, end, (int *)NULL) == end) {
678 MDEBUG(("success\n"));
679 freedfa(d);
680 return dissect(v, t->left, begin, end);
681 }
682 freedfa(d);
683 }
684 return REG_ASSERT; /* none of them matched?!? */
685 }
686
687 /*
688 - cdissect - determine subexpression matches (with complications)
689 * The retry memory stores the offset of the trial midpoint from begin,
690 * plus 1 so that 0 uniquely means "clean slate".
691 ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
692 */
693 static int /* regexec return code */
694 cdissect(v, t, begin, end)
695 struct vars *v;
696 struct subre *t;
697 chr *begin; /* beginning of relevant substring */
698 chr *end; /* end of same */
699 {
700 int er;
701
702 assert(t != NULL);
703 MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
704
705 switch (t->op) {
706 case '=': /* terminal node */
707 assert(t->left == NULL && t->right == NULL);
708 return REG_OKAY; /* no action, parent did the work */
709 break;
710 case '|': /* alternation */
711 assert(t->left != NULL);
712 return caltdissect(v, t, begin, end);
713 break;
714 case 'b': /* back ref -- shouldn't be calling us! */
715 assert(t->left == NULL && t->right == NULL);
716 return cbrdissect(v, t, begin, end);
717 break;
718 case '.': /* concatenation */
719 assert(t->left != NULL && t->right != NULL);
720 return ccondissect(v, t, begin, end);
721 break;
722 case '(': /* capturing */
723 assert(t->left != NULL && t->right == NULL);
724 assert(t->subno > 0);
725 er = cdissect(v, t->left, begin, end);
726 if (er == REG_OKAY)
727 subset(v, t, begin, end);
728 return er;
729 break;
730 default:
731 return REG_ASSERT;
732 break;
733 }
734 }
735
736 /*
737 - ccondissect - concatenation subexpression matches (with complications)
738 * The retry memory stores the offset of the trial midpoint from begin,
739 * plus 1 so that 0 uniquely means "clean slate".
740 ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
741 */
742 static int /* regexec return code */
743 ccondissect(v, t, begin, end)
744 struct vars *v;
745 struct subre *t;
746 chr *begin; /* beginning of relevant substring */
747 chr *end; /* end of same */
748 {
749 struct dfa *d;
750 struct dfa *d2;
751 chr *mid;
752 int er;
753
754 assert(t->op == '.');
755 assert(t->left != NULL && t->left->cnfa.nstates > 0);
756 assert(t->right != NULL && t->right->cnfa.nstates > 0);
757
758 if (t->left->flags&SHORTER) /* reverse scan */
759 return crevdissect(v, t, begin, end);
760
761 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
762 if (ISERR())
763 return v->err;
764 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
765 if (ISERR()) {
766 freedfa(d);
767 return v->err;
768 }
769 MDEBUG(("cconcat %d\n", t->retry));
770
771 /* pick a tentative midpoint */
772 if (v->mem[t->retry] == 0) {
773 mid = longest(v, d, begin, end, (int *)NULL);
774 if (mid == NULL) {
775 freedfa(d);
776 freedfa(d2);
777 return REG_NOMATCH;
778 }
779 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
780 v->mem[t->retry] = (mid - begin) + 1;
781 } else {
782 mid = begin + (v->mem[t->retry] - 1);
783 MDEBUG(("working midpoint %ld\n", LOFF(mid)));
784 }
785
786 /* iterate until satisfaction or failure */
787 for (;;) {
788 /* try this midpoint on for size */
789 er = cdissect(v, t->left, begin, mid);
790 if (er == REG_OKAY &&
791 longest(v, d2, mid, end, (int *)NULL) == end &&
792 (er = cdissect(v, t->right, mid, end)) ==
793 REG_OKAY)
794 break; /* NOTE BREAK OUT */
795 if (er != REG_OKAY && er != REG_NOMATCH) {
796 freedfa(d);
797 freedfa(d2);
798 return er;
799 }
800
801 /* that midpoint didn't work, find a new one */
802 if (mid == begin) {
803 /* all possibilities exhausted */
804 MDEBUG(("%d no midpoint\n", t->retry));
805 freedfa(d);
806 freedfa(d2);
807 return REG_NOMATCH;
808 }
809 mid = longest(v, d, begin, mid-1, (int *)NULL);
810 if (mid == NULL) {
811 /* failed to find a new one */
812 MDEBUG(("%d failed midpoint\n", t->retry));
813 freedfa(d);
814 freedfa(d2);
815 return REG_NOMATCH;
816 }
817 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
818 v->mem[t->retry] = (mid - begin) + 1;
819 zapmem(v, t->left);
820 zapmem(v, t->right);
821 }
822
823 /* satisfaction */
824 MDEBUG(("successful\n"));
825 freedfa(d);
826 freedfa(d2);
827 return REG_OKAY;
828 }
829
830 /*
831 - crevdissect - determine backref shortest-first subexpression matches
832 * The retry memory stores the offset of the trial midpoint from begin,
833 * plus 1 so that 0 uniquely means "clean slate".
834 ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
835 */
836 static int /* regexec return code */
837 crevdissect(v, t, begin, end)
838 struct vars *v;
839 struct subre *t;
840 chr *begin; /* beginning of relevant substring */
841 chr *end; /* end of same */
842 {
843 struct dfa *d;
844 struct dfa *d2;
845 chr *mid;
846 int er;
847
848 assert(t->op == '.');
849 assert(t->left != NULL && t->left->cnfa.nstates > 0);
850 assert(t->right != NULL && t->right->cnfa.nstates > 0);
851 assert(t->left->flags&SHORTER);
852
853 /* concatenation -- need to split the substring between parts */
854 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
855 if (ISERR())
856 return v->err;
857 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
858 if (ISERR()) {
859 freedfa(d);
860 return v->err;
861 }
862 MDEBUG(("crev %d\n", t->retry));
863
864 /* pick a tentative midpoint */
865 if (v->mem[t->retry] == 0) {
866 mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);
867 if (mid == NULL) {
868 freedfa(d);
869 freedfa(d2);
870 return REG_NOMATCH;
871 }
872 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
873 v->mem[t->retry] = (mid - begin) + 1;
874 } else {
875 mid = begin + (v->mem[t->retry] - 1);
876 MDEBUG(("working midpoint %ld\n", LOFF(mid)));
877 }
878
879 /* iterate until satisfaction or failure */
880 for (;;) {
881 /* try this midpoint on for size */
882 er = cdissect(v, t->left, begin, mid);
883 if (er == REG_OKAY &&
884 longest(v, d2, mid, end, (int *)NULL) == end &&
885 (er = cdissect(v, t->right, mid, end)) ==
886 REG_OKAY)
887 break; /* NOTE BREAK OUT */
888 if (er != REG_OKAY && er != REG_NOMATCH) {
889 freedfa(d);
890 freedfa(d2);
891 return er;
892 }
893
894 /* that midpoint didn't work, find a new one */
895 if (mid == end) {
896 /* all possibilities exhausted */
897 MDEBUG(("%d no midpoint\n", t->retry));
898 freedfa(d);
899 freedfa(d2);
900 return REG_NOMATCH;
901 }
902 mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);
903 if (mid == NULL) {
904 /* failed to find a new one */
905 MDEBUG(("%d failed midpoint\n", t->retry));
906 freedfa(d);
907 freedfa(d2);
908 return REG_NOMATCH;
909 }
910 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
911 v->mem[t->retry] = (mid - begin) + 1;
912 zapmem(v, t->left);
913 zapmem(v, t->right);
914 }
915
916 /* satisfaction */
917 MDEBUG(("successful\n"));
918 freedfa(d);
919 freedfa(d2);
920 return REG_OKAY;
921 }
922
923 /*
924 - cbrdissect - determine backref subexpression matches
925 ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
926 */
927 static int /* regexec return code */
928 cbrdissect(v, t, begin, end)
929 struct vars *v;
930 struct subre *t;
931 chr *begin; /* beginning of relevant substring */
932 chr *end; /* end of same */
933 {
934 int i;
935 int n = t->subno;
936 size_t len;
937 chr *paren;
938 chr *p;
939 chr *stop;
940 int min = t->min;
941 int max = t->max;
942
943 assert(t != NULL);
944 assert(t->op == 'b');
945 assert(n >= 0);
946 assert((size_t)n < v->nmatch);
947
948 MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
949
950 if (v->pmatch[n].rm_so == -1)
951 return REG_NOMATCH;
952 paren = v->start + v->pmatch[n].rm_so;
953 len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
954
955 /* no room to maneuver -- retries are pointless */
956 if (v->mem[t->retry])
957 return REG_NOMATCH;
958 v->mem[t->retry] = 1;
959
960 /* special-case zero-length string */
961 if (len == 0) {
962 if (begin == end)
963 return REG_OKAY;
964 return REG_NOMATCH;
965 }
966
967 /* and too-short string */
968 assert(end >= begin);
969 if ((size_t)(end - begin) < len)
970 return REG_NOMATCH;
971 stop = end - len;
972
973 /* count occurrences */
974 i = 0;
975 for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
976 if ((*v->g->compare)(paren, p, len) != 0)
977 break;
978 i++;
979 }
980 MDEBUG(("cbackref found %d\n", i));
981
982 /* and sort it out */
983 if (p != end) /* didn't consume all of it */
984 return REG_NOMATCH;
985 if (min <= i && (i <= max || max == INFINITY))
986 return REG_OKAY;
987 return REG_NOMATCH; /* out of range */
988 }
989
990 /*
991 - caltdissect - determine alternative subexpression matches (w. complications)
992 ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
993 */
994 static int /* regexec return code */
995 caltdissect(v, t, begin, end)
996 struct vars *v;
997 struct subre *t;
998 chr *begin; /* beginning of relevant substring */
999 chr *end; /* end of same */
1000 {
1001 struct dfa *d;
1002 int er;
1003 # define UNTRIED 0 /* not yet tried at all */
1004 # define TRYING 1 /* top matched, trying submatches */
1005 # define TRIED 2 /* top didn't match or submatches exhausted */
1006
1007 if (t == NULL)
1008 return REG_NOMATCH;
1009 assert(t->op == '|');
1010 if (v->mem[t->retry] == TRIED)
1011 return caltdissect(v, t->right, begin, end);
1012
1013 MDEBUG(("calt n%d\n", t->retry));
1014 assert(t->left != NULL);
1015
1016 if (v->mem[t->retry] == UNTRIED) {
1017 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1018 if (ISERR())
1019 return v->err;
1020 if (longest(v, d, begin, end, (int *)NULL) != end) {
1021 freedfa(d);
1022 v->mem[t->retry] = TRIED;
1023 return caltdissect(v, t->right, begin, end);
1024 }
1025 freedfa(d);
1026 MDEBUG(("calt matched\n"));
1027 v->mem[t->retry] = TRYING;
1028 }
1029
1030 er = cdissect(v, t->left, begin, end);
1031 if (er != REG_NOMATCH)
1032 return er;
1033
1034 v->mem[t->retry] = TRIED;
1035 return caltdissect(v, t->right, begin, end);
1036 }
1037
1038
1039
1040 #include "rege_dfa.c"
1041
1042 /* $History: regexec.c $
1043 *
1044 * ***************** Version 1 *****************
1045 * User: Dtashley Date: 1/02/01 Time: 12:11a
1046 * Created in $/IjuScripter, IjuConsole/Source/Tcl Base
1047 * Initial check-in.
1048 */
1049
1050 /* End of REGEXEC.C */

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25