1 |
/* $Header$ */ |
2 |
/* |
3 |
* NFA utilities. |
4 |
* This file is #included by regcomp.c. |
5 |
* |
6 |
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. |
7 |
* |
8 |
* Development of this software was funded, in part, by Cray Research Inc., |
9 |
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics |
10 |
* Corporation, none of whom are responsible for the results. The author |
11 |
* thanks all of them. |
12 |
* |
13 |
* Redistribution and use in source and binary forms -- with or without |
14 |
* modification -- are permitted for any purpose, provided that |
15 |
* redistributions in source form retain this entire copyright notice and |
16 |
* indicate the origin and nature of any modifications. |
17 |
* |
18 |
* I'd appreciate being given credit for this package in the documentation |
19 |
* of software which uses it, but that is not a requirement. |
20 |
* |
21 |
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
22 |
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
23 |
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
24 |
* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
25 |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
26 |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
27 |
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
28 |
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
29 |
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
30 |
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 |
* |
32 |
* |
33 |
* |
34 |
* One or two things that technically ought to be in here |
35 |
* are actually in color.c, thanks to some incestuous relationships in |
36 |
* the color chains. |
37 |
*/ |
38 |
|
39 |
#define NISERR() VISERR(nfa->v) |
40 |
#define NERR(e) VERR(nfa->v, (e)) |
41 |
|
42 |
|
43 |
/* |
44 |
- newnfa - set up an NFA |
45 |
^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); |
46 |
*/ |
47 |
static struct nfa * /* the NFA, or NULL */ |
48 |
newnfa(v, cm, parent) |
49 |
struct vars *v; |
50 |
struct colormap *cm; |
51 |
struct nfa *parent; /* NULL if primary NFA */ |
52 |
{ |
53 |
struct nfa *nfa; |
54 |
|
55 |
nfa = (struct nfa *)MALLOC(sizeof(struct nfa)); |
56 |
if (nfa == NULL) |
57 |
return NULL; |
58 |
|
59 |
nfa->states = NULL; |
60 |
nfa->slast = NULL; |
61 |
nfa->free = NULL; |
62 |
nfa->nstates = 0; |
63 |
nfa->cm = cm; |
64 |
nfa->v = v; |
65 |
nfa->bos[0] = nfa->bos[1] = COLORLESS; |
66 |
nfa->eos[0] = nfa->eos[1] = COLORLESS; |
67 |
nfa->post = newfstate(nfa, '@'); /* number 0 */ |
68 |
nfa->pre = newfstate(nfa, '>'); /* number 1 */ |
69 |
nfa->parent = parent; |
70 |
|
71 |
nfa->init = newstate(nfa); /* may become invalid later */ |
72 |
nfa->final = newstate(nfa); |
73 |
if (ISERR()) { |
74 |
freenfa(nfa); |
75 |
return NULL; |
76 |
} |
77 |
rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init); |
78 |
newarc(nfa, '^', 1, nfa->pre, nfa->init); |
79 |
newarc(nfa, '^', 0, nfa->pre, nfa->init); |
80 |
rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post); |
81 |
newarc(nfa, '$', 1, nfa->final, nfa->post); |
82 |
newarc(nfa, '$', 0, nfa->final, nfa->post); |
83 |
|
84 |
if (ISERR()) { |
85 |
freenfa(nfa); |
86 |
return NULL; |
87 |
} |
88 |
return nfa; |
89 |
} |
90 |
|
91 |
/* |
92 |
- freenfa - free an entire NFA |
93 |
^ static VOID freenfa(struct nfa *); |
94 |
*/ |
95 |
static VOID |
96 |
freenfa(nfa) |
97 |
struct nfa *nfa; |
98 |
{ |
99 |
struct state *s; |
100 |
|
101 |
while ((s = nfa->states) != NULL) { |
102 |
s->nins = s->nouts = 0; /* don't worry about arcs */ |
103 |
freestate(nfa, s); |
104 |
} |
105 |
while ((s = nfa->free) != NULL) { |
106 |
nfa->free = s->next; |
107 |
destroystate(nfa, s); |
108 |
} |
109 |
|
110 |
nfa->slast = NULL; |
111 |
nfa->nstates = -1; |
112 |
nfa->pre = NULL; |
113 |
nfa->post = NULL; |
114 |
FREE(nfa); |
115 |
} |
116 |
|
117 |
/* |
118 |
- newstate - allocate an NFA state, with zero flag value |
119 |
^ static struct state *newstate(struct nfa *); |
120 |
*/ |
121 |
static struct state * /* NULL on error */ |
122 |
newstate(nfa) |
123 |
struct nfa *nfa; |
124 |
{ |
125 |
struct state *s; |
126 |
|
127 |
if (nfa->free != NULL) { |
128 |
s = nfa->free; |
129 |
nfa->free = s->next; |
130 |
} else { |
131 |
s = (struct state *)MALLOC(sizeof(struct state)); |
132 |
if (s == NULL) { |
133 |
NERR(REG_ESPACE); |
134 |
return NULL; |
135 |
} |
136 |
s->oas.next = NULL; |
137 |
s->free = NULL; |
138 |
s->noas = 0; |
139 |
} |
140 |
|
141 |
assert(nfa->nstates >= 0); |
142 |
s->no = nfa->nstates++; |
143 |
s->flag = 0; |
144 |
if (nfa->states == NULL) |
145 |
nfa->states = s; |
146 |
s->nins = 0; |
147 |
s->ins = NULL; |
148 |
s->nouts = 0; |
149 |
s->outs = NULL; |
150 |
s->tmp = NULL; |
151 |
s->next = NULL; |
152 |
if (nfa->slast != NULL) { |
153 |
assert(nfa->slast->next == NULL); |
154 |
nfa->slast->next = s; |
155 |
} |
156 |
s->prev = nfa->slast; |
157 |
nfa->slast = s; |
158 |
return s; |
159 |
} |
160 |
|
161 |
/* |
162 |
- newfstate - allocate an NFA state with a specified flag value |
163 |
^ static struct state *newfstate(struct nfa *, int flag); |
164 |
*/ |
165 |
static struct state * /* NULL on error */ |
166 |
newfstate(nfa, flag) |
167 |
struct nfa *nfa; |
168 |
int flag; |
169 |
{ |
170 |
struct state *s; |
171 |
|
172 |
s = newstate(nfa); |
173 |
if (s != NULL) |
174 |
s->flag = (char)flag; |
175 |
return s; |
176 |
} |
177 |
|
178 |
/* |
179 |
- dropstate - delete a state's inarcs and outarcs and free it |
180 |
^ static VOID dropstate(struct nfa *, struct state *); |
181 |
*/ |
182 |
static VOID |
183 |
dropstate(nfa, s) |
184 |
struct nfa *nfa; |
185 |
struct state *s; |
186 |
{ |
187 |
struct arc *a; |
188 |
|
189 |
while ((a = s->ins) != NULL) |
190 |
freearc(nfa, a); |
191 |
while ((a = s->outs) != NULL) |
192 |
freearc(nfa, a); |
193 |
freestate(nfa, s); |
194 |
} |
195 |
|
196 |
/* |
197 |
- freestate - free a state, which has no in-arcs or out-arcs |
198 |
^ static VOID freestate(struct nfa *, struct state *); |
199 |
*/ |
200 |
static VOID |
201 |
freestate(nfa, s) |
202 |
struct nfa *nfa; |
203 |
struct state *s; |
204 |
{ |
205 |
assert(s != NULL); |
206 |
assert(s->nins == 0 && s->nouts == 0); |
207 |
|
208 |
s->no = FREESTATE; |
209 |
s->flag = 0; |
210 |
if (s->next != NULL) |
211 |
s->next->prev = s->prev; |
212 |
else { |
213 |
assert(s == nfa->slast); |
214 |
nfa->slast = s->prev; |
215 |
} |
216 |
if (s->prev != NULL) |
217 |
s->prev->next = s->next; |
218 |
else { |
219 |
assert(s == nfa->states); |
220 |
nfa->states = s->next; |
221 |
} |
222 |
s->prev = NULL; |
223 |
s->next = nfa->free; /* don't delete it, put it on the free list */ |
224 |
nfa->free = s; |
225 |
} |
226 |
|
227 |
/* |
228 |
- destroystate - really get rid of an already-freed state |
229 |
^ static VOID destroystate(struct nfa *, struct state *); |
230 |
*/ |
231 |
static VOID |
232 |
destroystate(nfa, s) |
233 |
struct nfa *nfa; |
234 |
struct state *s; |
235 |
{ |
236 |
struct arcbatch *ab; |
237 |
struct arcbatch *abnext; |
238 |
|
239 |
assert(s->no == FREESTATE); |
240 |
for (ab = s->oas.next; ab != NULL; ab = abnext) { |
241 |
abnext = ab->next; |
242 |
FREE(ab); |
243 |
} |
244 |
s->ins = NULL; |
245 |
s->outs = NULL; |
246 |
s->next = NULL; |
247 |
FREE(s); |
248 |
} |
249 |
|
250 |
/* |
251 |
- newarc - set up a new arc within an NFA |
252 |
^ static VOID newarc(struct nfa *, int, pcolor, struct state *, |
253 |
^ struct state *); |
254 |
*/ |
255 |
static VOID |
256 |
newarc(nfa, t, co, from, to) |
257 |
struct nfa *nfa; |
258 |
int t; |
259 |
pcolor co; |
260 |
struct state *from; |
261 |
struct state *to; |
262 |
{ |
263 |
struct arc *a; |
264 |
|
265 |
assert(from != NULL && to != NULL); |
266 |
|
267 |
/* check for duplicates */ |
268 |
for (a = from->outs; a != NULL; a = a->outchain) |
269 |
if (a->to == to && a->co == co && a->type == t) |
270 |
return; |
271 |
|
272 |
a = allocarc(nfa, from); |
273 |
if (NISERR()) |
274 |
return; |
275 |
assert(a != NULL); |
276 |
|
277 |
a->type = t; |
278 |
a->co = (color)co; |
279 |
a->to = to; |
280 |
a->from = from; |
281 |
|
282 |
/* |
283 |
* Put the new arc on the beginning, not the end, of the chains. |
284 |
* Not only is this easier, it has the very useful side effect that |
285 |
* deleting the most-recently-added arc is the cheapest case rather |
286 |
* than the most expensive one. |
287 |
*/ |
288 |
a->inchain = to->ins; |
289 |
to->ins = a; |
290 |
a->outchain = from->outs; |
291 |
from->outs = a; |
292 |
|
293 |
from->nouts++; |
294 |
to->nins++; |
295 |
|
296 |
if (COLORED(a) && nfa->parent == NULL) |
297 |
colorchain(nfa->cm, a); |
298 |
|
299 |
return; |
300 |
} |
301 |
|
302 |
/* |
303 |
- allocarc - allocate a new out-arc within a state |
304 |
^ static struct arc *allocarc(struct nfa *, struct state *); |
305 |
*/ |
306 |
static struct arc * /* NULL for failure */ |
307 |
allocarc(nfa, s) |
308 |
struct nfa *nfa; |
309 |
struct state *s; |
310 |
{ |
311 |
struct arc *a; |
312 |
struct arcbatch *new; |
313 |
int i; |
314 |
|
315 |
/* shortcut */ |
316 |
if (s->free == NULL && s->noas < ABSIZE) { |
317 |
a = &s->oas.a[s->noas]; |
318 |
s->noas++; |
319 |
return a; |
320 |
} |
321 |
|
322 |
/* if none at hand, get more */ |
323 |
if (s->free == NULL) { |
324 |
new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch)); |
325 |
if (new == NULL) { |
326 |
NERR(REG_ESPACE); |
327 |
return NULL; |
328 |
} |
329 |
new->next = s->oas.next; |
330 |
s->oas.next = new; |
331 |
|
332 |
for (i = 0; i < ABSIZE; i++) { |
333 |
new->a[i].type = 0; |
334 |
new->a[i].freechain = &new->a[i+1]; |
335 |
} |
336 |
new->a[ABSIZE-1].freechain = NULL; |
337 |
s->free = &new->a[0]; |
338 |
} |
339 |
assert(s->free != NULL); |
340 |
|
341 |
a = s->free; |
342 |
s->free = a->freechain; |
343 |
return a; |
344 |
} |
345 |
|
346 |
/* |
347 |
- freearc - free an arc |
348 |
^ static VOID freearc(struct nfa *, struct arc *); |
349 |
*/ |
350 |
static VOID |
351 |
freearc(nfa, victim) |
352 |
struct nfa *nfa; |
353 |
struct arc *victim; |
354 |
{ |
355 |
struct state *from = victim->from; |
356 |
struct state *to = victim->to; |
357 |
struct arc *a; |
358 |
|
359 |
assert(victim->type != 0); |
360 |
|
361 |
/* take it off color chain if necessary */ |
362 |
if (COLORED(victim) && nfa->parent == NULL) |
363 |
uncolorchain(nfa->cm, victim); |
364 |
|
365 |
/* take it off source's out-chain */ |
366 |
assert(from != NULL); |
367 |
assert(from->outs != NULL); |
368 |
a = from->outs; |
369 |
if (a == victim) /* simple case: first in chain */ |
370 |
from->outs = victim->outchain; |
371 |
else { |
372 |
for (; a != NULL && a->outchain != victim; a = a->outchain) |
373 |
continue; |
374 |
assert(a != NULL); |
375 |
a->outchain = victim->outchain; |
376 |
} |
377 |
from->nouts--; |
378 |
|
379 |
/* take it off target's in-chain */ |
380 |
assert(to != NULL); |
381 |
assert(to->ins != NULL); |
382 |
a = to->ins; |
383 |
if (a == victim) /* simple case: first in chain */ |
384 |
to->ins = victim->inchain; |
385 |
else { |
386 |
for (; a != NULL && a->inchain != victim; a = a->inchain) |
387 |
continue; |
388 |
assert(a != NULL); |
389 |
a->inchain = victim->inchain; |
390 |
} |
391 |
to->nins--; |
392 |
|
393 |
/* clean up and place on free list */ |
394 |
victim->type = 0; |
395 |
victim->from = NULL; /* precautions... */ |
396 |
victim->to = NULL; |
397 |
victim->inchain = NULL; |
398 |
victim->outchain = NULL; |
399 |
victim->freechain = from->free; |
400 |
from->free = victim; |
401 |
} |
402 |
|
403 |
/* |
404 |
- findarc - find arc, if any, from given source with given type and color |
405 |
* If there is more than one such arc, the result is random. |
406 |
^ static struct arc *findarc(struct state *, int, pcolor); |
407 |
*/ |
408 |
static struct arc * |
409 |
findarc(s, type, co) |
410 |
struct state *s; |
411 |
int type; |
412 |
pcolor co; |
413 |
{ |
414 |
struct arc *a; |
415 |
|
416 |
for (a = s->outs; a != NULL; a = a->outchain) |
417 |
if (a->type == type && a->co == co) |
418 |
return a; |
419 |
return NULL; |
420 |
} |
421 |
|
422 |
/* |
423 |
- cparc - allocate a new arc within an NFA, copying details from old one |
424 |
^ static VOID cparc(struct nfa *, struct arc *, struct state *, |
425 |
^ struct state *); |
426 |
*/ |
427 |
static VOID |
428 |
cparc(nfa, oa, from, to) |
429 |
struct nfa *nfa; |
430 |
struct arc *oa; |
431 |
struct state *from; |
432 |
struct state *to; |
433 |
{ |
434 |
newarc(nfa, oa->type, oa->co, from, to); |
435 |
} |
436 |
|
437 |
/* |
438 |
- moveins - move all in arcs of a state to another state |
439 |
* You might think this could be done better by just updating the |
440 |
* existing arcs, and you would be right if it weren't for the desire |
441 |
* for duplicate suppression, which makes it easier to just make new |
442 |
* ones to exploit the suppression built into newarc. |
443 |
^ static VOID moveins(struct nfa *, struct state *, struct state *); |
444 |
*/ |
445 |
static VOID |
446 |
moveins(nfa, old, new) |
447 |
struct nfa *nfa; |
448 |
struct state *old; |
449 |
struct state *new; |
450 |
{ |
451 |
struct arc *a; |
452 |
|
453 |
assert(old != new); |
454 |
|
455 |
while ((a = old->ins) != NULL) { |
456 |
cparc(nfa, a, a->from, new); |
457 |
freearc(nfa, a); |
458 |
} |
459 |
assert(old->nins == 0); |
460 |
assert(old->ins == NULL); |
461 |
} |
462 |
|
463 |
/* |
464 |
- copyins - copy all in arcs of a state to another state |
465 |
^ static VOID copyins(struct nfa *, struct state *, struct state *); |
466 |
*/ |
467 |
static VOID |
468 |
copyins(nfa, old, new) |
469 |
struct nfa *nfa; |
470 |
struct state *old; |
471 |
struct state *new; |
472 |
{ |
473 |
struct arc *a; |
474 |
|
475 |
assert(old != new); |
476 |
|
477 |
for (a = old->ins; a != NULL; a = a->inchain) |
478 |
cparc(nfa, a, a->from, new); |
479 |
} |
480 |
|
481 |
/* |
482 |
- moveouts - move all out arcs of a state to another state |
483 |
^ static VOID moveouts(struct nfa *, struct state *, struct state *); |
484 |
*/ |
485 |
static VOID |
486 |
moveouts(nfa, old, new) |
487 |
struct nfa *nfa; |
488 |
struct state *old; |
489 |
struct state *new; |
490 |
{ |
491 |
struct arc *a; |
492 |
|
493 |
assert(old != new); |
494 |
|
495 |
while ((a = old->outs) != NULL) { |
496 |
cparc(nfa, a, new, a->to); |
497 |
freearc(nfa, a); |
498 |
} |
499 |
} |
500 |
|
501 |
/* |
502 |
- copyouts - copy all out arcs of a state to another state |
503 |
^ static VOID copyouts(struct nfa *, struct state *, struct state *); |
504 |
*/ |
505 |
static VOID |
506 |
copyouts(nfa, old, new) |
507 |
struct nfa *nfa; |
508 |
struct state *old; |
509 |
struct state *new; |
510 |
{ |
511 |
struct arc *a; |
512 |
|
513 |
assert(old != new); |
514 |
|
515 |
for (a = old->outs; a != NULL; a = a->outchain) |
516 |
cparc(nfa, a, new, a->to); |
517 |
} |
518 |
|
519 |
/* |
520 |
- cloneouts - copy out arcs of a state to another state pair, modifying type |
521 |
^ static VOID cloneouts(struct nfa *, struct state *, struct state *, |
522 |
^ struct state *, int); |
523 |
*/ |
524 |
static VOID |
525 |
cloneouts(nfa, old, from, to, type) |
526 |
struct nfa *nfa; |
527 |
struct state *old; |
528 |
struct state *from; |
529 |
struct state *to; |
530 |
int type; |
531 |
{ |
532 |
struct arc *a; |
533 |
|
534 |
assert(old != from); |
535 |
|
536 |
for (a = old->outs; a != NULL; a = a->outchain) |
537 |
newarc(nfa, type, a->co, from, to); |
538 |
} |
539 |
|
540 |
/* |
541 |
- delsub - delete a sub-NFA, updating subre pointers if necessary |
542 |
* This uses a recursive traversal of the sub-NFA, marking already-seen |
543 |
* states using their tmp pointer. |
544 |
^ static VOID delsub(struct nfa *, struct state *, struct state *); |
545 |
*/ |
546 |
static VOID |
547 |
delsub(nfa, lp, rp) |
548 |
struct nfa *nfa; |
549 |
struct state *lp; /* the sub-NFA goes from here... */ |
550 |
struct state *rp; /* ...to here, *not* inclusive */ |
551 |
{ |
552 |
assert(lp != rp); |
553 |
|
554 |
rp->tmp = rp; /* mark end */ |
555 |
|
556 |
deltraverse(nfa, lp, lp); |
557 |
assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ |
558 |
assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ |
559 |
|
560 |
rp->tmp = NULL; /* unmark end */ |
561 |
lp->tmp = NULL; /* and begin, marked by deltraverse */ |
562 |
} |
563 |
|
564 |
/* |
565 |
- deltraverse - the recursive heart of delsub |
566 |
* This routine's basic job is to destroy all out-arcs of the state. |
567 |
^ static VOID deltraverse(struct nfa *, struct state *, struct state *); |
568 |
*/ |
569 |
static VOID |
570 |
deltraverse(nfa, leftend, s) |
571 |
struct nfa *nfa; |
572 |
struct state *leftend; |
573 |
struct state *s; |
574 |
{ |
575 |
struct arc *a; |
576 |
struct state *to; |
577 |
|
578 |
if (s->nouts == 0) |
579 |
return; /* nothing to do */ |
580 |
if (s->tmp != NULL) |
581 |
return; /* already in progress */ |
582 |
|
583 |
s->tmp = s; /* mark as in progress */ |
584 |
|
585 |
while ((a = s->outs) != NULL) { |
586 |
to = a->to; |
587 |
deltraverse(nfa, leftend, to); |
588 |
assert(to->nouts == 0 || to->tmp != NULL); |
589 |
freearc(nfa, a); |
590 |
if (to->nins == 0 && to->tmp == NULL) { |
591 |
assert(to->nouts == 0); |
592 |
freestate(nfa, to); |
593 |
} |
594 |
} |
595 |
|
596 |
assert(s->no != FREESTATE); /* we're still here */ |
597 |
assert(s == leftend || s->nins != 0); /* and still reachable */ |
598 |
assert(s->nouts == 0); /* but have no outarcs */ |
599 |
|
600 |
s->tmp = NULL; /* we're done here */ |
601 |
} |
602 |
|
603 |
/* |
604 |
- dupnfa - duplicate sub-NFA |
605 |
* Another recursive traversal, this time using tmp to point to duplicates |
606 |
* as well as mark already-seen states. (You knew there was a reason why |
607 |
* it's a state pointer, didn't you? :-)) |
608 |
^ static VOID dupnfa(struct nfa *, struct state *, struct state *, |
609 |
^ struct state *, struct state *); |
610 |
*/ |
611 |
static VOID |
612 |
dupnfa(nfa, start, stop, from, to) |
613 |
struct nfa *nfa; |
614 |
struct state *start; /* duplicate of subNFA starting here */ |
615 |
struct state *stop; /* and stopping here */ |
616 |
struct state *from; /* stringing duplicate from here */ |
617 |
struct state *to; /* to here */ |
618 |
{ |
619 |
if (start == stop) { |
620 |
newarc(nfa, EMPTY, 0, from, to); |
621 |
return; |
622 |
} |
623 |
|
624 |
stop->tmp = to; |
625 |
duptraverse(nfa, start, from); |
626 |
/* done, except for clearing out the tmp pointers */ |
627 |
|
628 |
stop->tmp = NULL; |
629 |
cleartraverse(nfa, start); |
630 |
} |
631 |
|
632 |
/* |
633 |
- duptraverse - recursive heart of dupnfa |
634 |
^ static VOID duptraverse(struct nfa *, struct state *, struct state *); |
635 |
*/ |
636 |
static VOID |
637 |
duptraverse(nfa, s, stmp) |
638 |
struct nfa *nfa; |
639 |
struct state *s; |
640 |
struct state *stmp; /* s's duplicate, or NULL */ |
641 |
{ |
642 |
struct arc *a; |
643 |
|
644 |
if (s->tmp != NULL) |
645 |
return; /* already done */ |
646 |
|
647 |
s->tmp = (stmp == NULL) ? newstate(nfa) : stmp; |
648 |
if (s->tmp == NULL) { |
649 |
assert(NISERR()); |
650 |
return; |
651 |
} |
652 |
|
653 |
for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { |
654 |
duptraverse(nfa, a->to, (struct state *)NULL); |
655 |
assert(a->to->tmp != NULL); |
656 |
cparc(nfa, a, s->tmp, a->to->tmp); |
657 |
} |
658 |
} |
659 |
|
660 |
/* |
661 |
- cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set |
662 |
^ static VOID cleartraverse(struct nfa *, struct state *); |
663 |
*/ |
664 |
static VOID |
665 |
cleartraverse(nfa, s) |
666 |
struct nfa *nfa; |
667 |
struct state *s; |
668 |
{ |
669 |
struct arc *a; |
670 |
|
671 |
if (s->tmp == NULL) |
672 |
return; |
673 |
s->tmp = NULL; |
674 |
|
675 |
for (a = s->outs; a != NULL; a = a->outchain) |
676 |
cleartraverse(nfa, a->to); |
677 |
} |
678 |
|
679 |
/* |
680 |
- specialcolors - fill in special colors for an NFA |
681 |
^ static VOID specialcolors(struct nfa *); |
682 |
*/ |
683 |
static VOID |
684 |
specialcolors(nfa) |
685 |
struct nfa *nfa; |
686 |
{ |
687 |
/* false colors for BOS, BOL, EOS, EOL */ |
688 |
if (nfa->parent == NULL) { |
689 |
nfa->bos[0] = pseudocolor(nfa->cm); |
690 |
nfa->bos[1] = pseudocolor(nfa->cm); |
691 |
nfa->eos[0] = pseudocolor(nfa->cm); |
692 |
nfa->eos[1] = pseudocolor(nfa->cm); |
693 |
} else { |
694 |
assert(nfa->parent->bos[0] != COLORLESS); |
695 |
nfa->bos[0] = nfa->parent->bos[0]; |
696 |
assert(nfa->parent->bos[1] != COLORLESS); |
697 |
nfa->bos[1] = nfa->parent->bos[1]; |
698 |
assert(nfa->parent->eos[0] != COLORLESS); |
699 |
nfa->eos[0] = nfa->parent->eos[0]; |
700 |
assert(nfa->parent->eos[1] != COLORLESS); |
701 |
nfa->eos[1] = nfa->parent->eos[1]; |
702 |
} |
703 |
} |
704 |
|
705 |
/* |
706 |
- optimize - optimize an NFA |
707 |
^ static long optimize(struct nfa *, FILE *); |
708 |
*/ |
709 |
static long /* re_info bits */ |
710 |
optimize(nfa, f) |
711 |
struct nfa *nfa; |
712 |
FILE *f; /* for debug output; NULL none */ |
713 |
{ |
714 |
int verbose = (f != NULL) ? 1 : 0; |
715 |
|
716 |
if (verbose) |
717 |
fprintf(f, "\ninitial cleanup:\n"); |
718 |
cleanup(nfa); /* may simplify situation */ |
719 |
if (verbose) |
720 |
dumpnfa(nfa, f); |
721 |
if (verbose) |
722 |
fprintf(f, "\nempties:\n"); |
723 |
fixempties(nfa, f); /* get rid of EMPTY arcs */ |
724 |
if (verbose) |
725 |
fprintf(f, "\nconstraints:\n"); |
726 |
pullback(nfa, f); /* pull back constraints backward */ |
727 |
pushfwd(nfa, f); /* push fwd constraints forward */ |
728 |
if (verbose) |
729 |
fprintf(f, "\nfinal cleanup:\n"); |
730 |
cleanup(nfa); /* final tidying */ |
731 |
return analyze(nfa); /* and analysis */ |
732 |
} |
733 |
|
734 |
/* |
735 |
- pullback - pull back constraints backward to (with luck) eliminate them |
736 |
^ static VOID pullback(struct nfa *, FILE *); |
737 |
*/ |
738 |
static VOID |
739 |
pullback(nfa, f) |
740 |
struct nfa *nfa; |
741 |
FILE *f; /* for debug output; NULL none */ |
742 |
{ |
743 |
struct state *s; |
744 |
struct state *nexts; |
745 |
struct arc *a; |
746 |
struct arc *nexta; |
747 |
int progress; |
748 |
|
749 |
/* find and pull until there are no more */ |
750 |
do { |
751 |
progress = 0; |
752 |
for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { |
753 |
nexts = s->next; |
754 |
for (a = s->outs; a != NULL && !NISERR(); a = nexta) { |
755 |
nexta = a->outchain; |
756 |
if (a->type == '^' || a->type == BEHIND) |
757 |
if (pull(nfa, a)) |
758 |
progress = 1; |
759 |
assert(nexta == NULL || s->no != FREESTATE); |
760 |
} |
761 |
} |
762 |
if (progress && f != NULL) |
763 |
dumpnfa(nfa, f); |
764 |
} while (progress && !NISERR()); |
765 |
if (NISERR()) |
766 |
return; |
767 |
|
768 |
for (a = nfa->pre->outs; a != NULL; a = nexta) { |
769 |
nexta = a->outchain; |
770 |
if (a->type == '^') { |
771 |
assert(a->co == 0 || a->co == 1); |
772 |
newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to); |
773 |
freearc(nfa, a); |
774 |
} |
775 |
} |
776 |
} |
777 |
|
778 |
/* |
779 |
- pull - pull a back constraint backward past its source state |
780 |
* A significant property of this function is that it deletes at most |
781 |
* one state -- the constraint's from state -- and only if the constraint |
782 |
* was that state's last outarc. |
783 |
^ static int pull(struct nfa *, struct arc *); |
784 |
*/ |
785 |
static int /* 0 couldn't, 1 could */ |
786 |
pull(nfa, con) |
787 |
struct nfa *nfa; |
788 |
struct arc *con; |
789 |
{ |
790 |
struct state *from = con->from; |
791 |
struct state *to = con->to; |
792 |
struct arc *a; |
793 |
struct arc *nexta; |
794 |
struct state *s; |
795 |
|
796 |
if (from == to) { /* circular constraint is pointless */ |
797 |
freearc(nfa, con); |
798 |
return 1; |
799 |
} |
800 |
if (from->flag) /* can't pull back beyond start */ |
801 |
return 0; |
802 |
if (from->nins == 0) { /* unreachable */ |
803 |
freearc(nfa, con); |
804 |
return 1; |
805 |
} |
806 |
|
807 |
/* first, clone from state if necessary to avoid other outarcs */ |
808 |
if (from->nouts > 1) { |
809 |
s = newstate(nfa); |
810 |
if (NISERR()) |
811 |
return 0; |
812 |
assert(to != from); /* con is not an inarc */ |
813 |
copyins(nfa, from, s); /* duplicate inarcs */ |
814 |
cparc(nfa, con, s, to); /* move constraint arc */ |
815 |
freearc(nfa, con); |
816 |
from = s; |
817 |
con = from->outs; |
818 |
} |
819 |
assert(from->nouts == 1); |
820 |
|
821 |
/* propagate the constraint into the from state's inarcs */ |
822 |
for (a = from->ins; a != NULL; a = nexta) { |
823 |
nexta = a->inchain; |
824 |
switch (combine(con, a)) { |
825 |
case INCOMPATIBLE: /* destroy the arc */ |
826 |
freearc(nfa, a); |
827 |
break; |
828 |
case SATISFIED: /* no action needed */ |
829 |
break; |
830 |
case COMPATIBLE: /* swap the two arcs, more or less */ |
831 |
s = newstate(nfa); |
832 |
if (NISERR()) |
833 |
return 0; |
834 |
cparc(nfa, a, s, to); /* anticipate move */ |
835 |
cparc(nfa, con, a->from, s); |
836 |
if (NISERR()) |
837 |
return 0; |
838 |
freearc(nfa, a); |
839 |
break; |
840 |
default: |
841 |
assert(NOTREACHED); |
842 |
break; |
843 |
} |
844 |
} |
845 |
|
846 |
/* remaining inarcs, if any, incorporate the constraint */ |
847 |
moveins(nfa, from, to); |
848 |
dropstate(nfa, from); /* will free the constraint */ |
849 |
return 1; |
850 |
} |
851 |
|
852 |
/* |
853 |
- pushfwd - push forward constraints forward to (with luck) eliminate them |
854 |
^ static VOID pushfwd(struct nfa *, FILE *); |
855 |
*/ |
856 |
static VOID |
857 |
pushfwd(nfa, f) |
858 |
struct nfa *nfa; |
859 |
FILE *f; /* for debug output; NULL none */ |
860 |
{ |
861 |
struct state *s; |
862 |
struct state *nexts; |
863 |
struct arc *a; |
864 |
struct arc *nexta; |
865 |
int progress; |
866 |
|
867 |
/* find and push until there are no more */ |
868 |
do { |
869 |
progress = 0; |
870 |
for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { |
871 |
nexts = s->next; |
872 |
for (a = s->ins; a != NULL && !NISERR(); a = nexta) { |
873 |
nexta = a->inchain; |
874 |
if (a->type == '$' || a->type == AHEAD) |
875 |
if (push(nfa, a)) |
876 |
progress = 1; |
877 |
assert(nexta == NULL || s->no != FREESTATE); |
878 |
} |
879 |
} |
880 |
if (progress && f != NULL) |
881 |
dumpnfa(nfa, f); |
882 |
} while (progress && !NISERR()); |
883 |
if (NISERR()) |
884 |
return; |
885 |
|
886 |
for (a = nfa->post->ins; a != NULL; a = nexta) { |
887 |
nexta = a->inchain; |
888 |
if (a->type == '$') { |
889 |
assert(a->co == 0 || a->co == 1); |
890 |
newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to); |
891 |
freearc(nfa, a); |
892 |
} |
893 |
} |
894 |
} |
895 |
|
896 |
/* |
897 |
- push - push a forward constraint forward past its destination state |
898 |
* A significant property of this function is that it deletes at most |
899 |
* one state -- the constraint's to state -- and only if the constraint |
900 |
* was that state's last inarc. |
901 |
^ static int push(struct nfa *, struct arc *); |
902 |
*/ |
903 |
static int /* 0 couldn't, 1 could */ |
904 |
push(nfa, con) |
905 |
struct nfa *nfa; |
906 |
struct arc *con; |
907 |
{ |
908 |
struct state *from = con->from; |
909 |
struct state *to = con->to; |
910 |
struct arc *a; |
911 |
struct arc *nexta; |
912 |
struct state *s; |
913 |
|
914 |
if (to == from) { /* circular constraint is pointless */ |
915 |
freearc(nfa, con); |
916 |
return 1; |
917 |
} |
918 |
if (to->flag) /* can't push forward beyond end */ |
919 |
return 0; |
920 |
if (to->nouts == 0) { /* dead end */ |
921 |
freearc(nfa, con); |
922 |
return 1; |
923 |
} |
924 |
|
925 |
/* first, clone to state if necessary to avoid other inarcs */ |
926 |
if (to->nins > 1) { |
927 |
s = newstate(nfa); |
928 |
if (NISERR()) |
929 |
return 0; |
930 |
copyouts(nfa, to, s); /* duplicate outarcs */ |
931 |
cparc(nfa, con, from, s); /* move constraint */ |
932 |
freearc(nfa, con); |
933 |
to = s; |
934 |
con = to->ins; |
935 |
} |
936 |
assert(to->nins == 1); |
937 |
|
938 |
/* propagate the constraint into the to state's outarcs */ |
939 |
for (a = to->outs; a != NULL; a = nexta) { |
940 |
nexta = a->outchain; |
941 |
switch (combine(con, a)) { |
942 |
case INCOMPATIBLE: /* destroy the arc */ |
943 |
freearc(nfa, a); |
944 |
break; |
945 |
case SATISFIED: /* no action needed */ |
946 |
break; |
947 |
case COMPATIBLE: /* swap the two arcs, more or less */ |
948 |
s = newstate(nfa); |
949 |
if (NISERR()) |
950 |
return 0; |
951 |
cparc(nfa, con, s, a->to); /* anticipate move */ |
952 |
cparc(nfa, a, from, s); |
953 |
if (NISERR()) |
954 |
return 0; |
955 |
freearc(nfa, a); |
956 |
break; |
957 |
default: |
958 |
assert(NOTREACHED); |
959 |
break; |
960 |
} |
961 |
} |
962 |
|
963 |
/* remaining outarcs, if any, incorporate the constraint */ |
964 |
moveouts(nfa, to, from); |
965 |
dropstate(nfa, to); /* will free the constraint */ |
966 |
return 1; |
967 |
} |
968 |
|
969 |
/* |
970 |
- combine - constraint lands on an arc, what happens? |
971 |
^ #def INCOMPATIBLE 1 // destroys arc |
972 |
^ #def SATISFIED 2 // constraint satisfied |
973 |
^ #def COMPATIBLE 3 // compatible but not satisfied yet |
974 |
^ static int combine(struct arc *, struct arc *); |
975 |
*/ |
976 |
static int |
977 |
combine(con, a) |
978 |
struct arc *con; |
979 |
struct arc *a; |
980 |
{ |
981 |
# define CA(ct,at) (((ct)<<CHAR_BIT) | (at)) |
982 |
|
983 |
switch (CA(con->type, a->type)) { |
984 |
case CA('^', PLAIN): /* newlines are handled separately */ |
985 |
case CA('$', PLAIN): |
986 |
return INCOMPATIBLE; |
987 |
break; |
988 |
case CA(AHEAD, PLAIN): /* color constraints meet colors */ |
989 |
case CA(BEHIND, PLAIN): |
990 |
if (con->co == a->co) |
991 |
return SATISFIED; |
992 |
return INCOMPATIBLE; |
993 |
break; |
994 |
case CA('^', '^'): /* collision, similar constraints */ |
995 |
case CA('$', '$'): |
996 |
case CA(AHEAD, AHEAD): |
997 |
case CA(BEHIND, BEHIND): |
998 |
if (con->co == a->co) /* true duplication */ |
999 |
return SATISFIED; |
1000 |
return INCOMPATIBLE; |
1001 |
break; |
1002 |
case CA('^', BEHIND): /* collision, dissimilar constraints */ |
1003 |
case CA(BEHIND, '^'): |
1004 |
case CA('$', AHEAD): |
1005 |
case CA(AHEAD, '$'): |
1006 |
return INCOMPATIBLE; |
1007 |
break; |
1008 |
case CA('^', '$'): /* constraints passing each other */ |
1009 |
case CA('^', AHEAD): |
1010 |
case CA(BEHIND, '$'): |
1011 |
case CA(BEHIND, AHEAD): |
1012 |
case CA('$', '^'): |
1013 |
case CA('$', BEHIND): |
1014 |
case CA(AHEAD, '^'): |
1015 |
case CA(AHEAD, BEHIND): |
1016 |
case CA('^', LACON): |
1017 |
case CA(BEHIND, LACON): |
1018 |
case CA('$', LACON): |
1019 |
case CA(AHEAD, LACON): |
1020 |
return COMPATIBLE; |
1021 |
break; |
1022 |
} |
1023 |
assert(NOTREACHED); |
1024 |
return INCOMPATIBLE; /* for benefit of blind compilers */ |
1025 |
} |
1026 |
|
1027 |
/* |
1028 |
- fixempties - get rid of EMPTY arcs |
1029 |
^ static VOID fixempties(struct nfa *, FILE *); |
1030 |
*/ |
1031 |
static VOID |
1032 |
fixempties(nfa, f) |
1033 |
struct nfa *nfa; |
1034 |
FILE *f; /* for debug output; NULL none */ |
1035 |
{ |
1036 |
struct state *s; |
1037 |
struct state *nexts; |
1038 |
struct arc *a; |
1039 |
struct arc *nexta; |
1040 |
int progress; |
1041 |
|
1042 |
/* find and eliminate empties until there are no more */ |
1043 |
do { |
1044 |
progress = 0; |
1045 |
for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { |
1046 |
nexts = s->next; |
1047 |
for (a = s->outs; a != NULL && !NISERR(); a = nexta) { |
1048 |
nexta = a->outchain; |
1049 |
if (a->type == EMPTY && unempty(nfa, a)) |
1050 |
progress = 1; |
1051 |
assert(nexta == NULL || s->no != FREESTATE); |
1052 |
} |
1053 |
} |
1054 |
if (progress && f != NULL) |
1055 |
dumpnfa(nfa, f); |
1056 |
} while (progress && !NISERR()); |
1057 |
} |
1058 |
|
1059 |
/* |
1060 |
- unempty - optimize out an EMPTY arc, if possible |
1061 |
* Actually, as it stands this function always succeeds, but the return |
1062 |
* value is kept with an eye on possible future changes. |
1063 |
^ static int unempty(struct nfa *, struct arc *); |
1064 |
*/ |
1065 |
static int /* 0 couldn't, 1 could */ |
1066 |
unempty(nfa, a) |
1067 |
struct nfa *nfa; |
1068 |
struct arc *a; |
1069 |
{ |
1070 |
struct state *from = a->from; |
1071 |
struct state *to = a->to; |
1072 |
int usefrom; /* work on from, as opposed to to? */ |
1073 |
|
1074 |
assert(a->type == EMPTY); |
1075 |
assert(from != nfa->pre && to != nfa->post); |
1076 |
|
1077 |
if (from == to) { /* vacuous loop */ |
1078 |
freearc(nfa, a); |
1079 |
return 1; |
1080 |
} |
1081 |
|
1082 |
/* decide which end to work on */ |
1083 |
usefrom = 1; /* default: attack from */ |
1084 |
if (from->nouts > to->nins) |
1085 |
usefrom = 0; |
1086 |
else if (from->nouts == to->nins) { |
1087 |
/* decide on secondary issue: move/copy fewest arcs */ |
1088 |
if (from->nins > to->nouts) |
1089 |
usefrom = 0; |
1090 |
} |
1091 |
|
1092 |
freearc(nfa, a); |
1093 |
if (usefrom) { |
1094 |
if (from->nouts == 0) { |
1095 |
/* was the state's only outarc */ |
1096 |
moveins(nfa, from, to); |
1097 |
freestate(nfa, from); |
1098 |
} else |
1099 |
copyins(nfa, from, to); |
1100 |
} else { |
1101 |
if (to->nins == 0) { |
1102 |
/* was the state's only inarc */ |
1103 |
moveouts(nfa, to, from); |
1104 |
freestate(nfa, to); |
1105 |
} else |
1106 |
copyouts(nfa, to, from); |
1107 |
} |
1108 |
|
1109 |
return 1; |
1110 |
} |
1111 |
|
1112 |
/* |
1113 |
- cleanup - clean up NFA after optimizations |
1114 |
^ static VOID cleanup(struct nfa *); |
1115 |
*/ |
1116 |
static VOID |
1117 |
cleanup(nfa) |
1118 |
struct nfa *nfa; |
1119 |
{ |
1120 |
struct state *s; |
1121 |
struct state *nexts; |
1122 |
int n; |
1123 |
|
1124 |
/* clear out unreachable or dead-end states */ |
1125 |
/* use pre to mark reachable, then post to mark can-reach-post */ |
1126 |
markreachable(nfa, nfa->pre, (struct state *)NULL, nfa->pre); |
1127 |
markcanreach(nfa, nfa->post, nfa->pre, nfa->post); |
1128 |
for (s = nfa->states; s != NULL; s = nexts) { |
1129 |
nexts = s->next; |
1130 |
if (s->tmp != nfa->post && !s->flag) |
1131 |
dropstate(nfa, s); |
1132 |
} |
1133 |
assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post); |
1134 |
cleartraverse(nfa, nfa->pre); |
1135 |
assert(nfa->post->nins == 0 || nfa->post->tmp == NULL); |
1136 |
/* the nins==0 (final unreachable) case will be caught later */ |
1137 |
|
1138 |
/* renumber surviving states */ |
1139 |
n = 0; |
1140 |
for (s = nfa->states; s != NULL; s = s->next) |
1141 |
s->no = n++; |
1142 |
nfa->nstates = n; |
1143 |
} |
1144 |
|
1145 |
/* |
1146 |
- markreachable - recursive marking of reachable states |
1147 |
^ static VOID markreachable(struct nfa *, struct state *, struct state *, |
1148 |
^ struct state *); |
1149 |
*/ |
1150 |
static VOID |
1151 |
markreachable(nfa, s, okay, mark) |
1152 |
struct nfa *nfa; |
1153 |
struct state *s; |
1154 |
struct state *okay; /* consider only states with this mark */ |
1155 |
struct state *mark; /* the value to mark with */ |
1156 |
{ |
1157 |
struct arc *a; |
1158 |
|
1159 |
if (s->tmp != okay) |
1160 |
return; |
1161 |
s->tmp = mark; |
1162 |
|
1163 |
for (a = s->outs; a != NULL; a = a->outchain) |
1164 |
markreachable(nfa, a->to, okay, mark); |
1165 |
} |
1166 |
|
1167 |
/* |
1168 |
- markcanreach - recursive marking of states which can reach here |
1169 |
^ static VOID markcanreach(struct nfa *, struct state *, struct state *, |
1170 |
^ struct state *); |
1171 |
*/ |
1172 |
static VOID |
1173 |
markcanreach(nfa, s, okay, mark) |
1174 |
struct nfa *nfa; |
1175 |
struct state *s; |
1176 |
struct state *okay; /* consider only states with this mark */ |
1177 |
struct state *mark; /* the value to mark with */ |
1178 |
{ |
1179 |
struct arc *a; |
1180 |
|
1181 |
if (s->tmp != okay) |
1182 |
return; |
1183 |
s->tmp = mark; |
1184 |
|
1185 |
for (a = s->ins; a != NULL; a = a->inchain) |
1186 |
markcanreach(nfa, a->from, okay, mark); |
1187 |
} |
1188 |
|
1189 |
/* |
1190 |
- analyze - ascertain potentially-useful facts about an optimized NFA |
1191 |
^ static long analyze(struct nfa *); |
1192 |
*/ |
1193 |
static long /* re_info bits to be ORed in */ |
1194 |
analyze(nfa) |
1195 |
struct nfa *nfa; |
1196 |
{ |
1197 |
struct arc *a; |
1198 |
struct arc *aa; |
1199 |
|
1200 |
if (nfa->pre->outs == NULL) |
1201 |
return REG_UIMPOSSIBLE; |
1202 |
for (a = nfa->pre->outs; a != NULL; a = a->outchain) |
1203 |
for (aa = a->to->outs; aa != NULL; aa = aa->outchain) |
1204 |
if (aa->to == nfa->post) |
1205 |
return REG_UEMPTYMATCH; |
1206 |
return 0; |
1207 |
} |
1208 |
|
1209 |
/* |
1210 |
- compact - compact an NFA |
1211 |
^ static VOID compact(struct nfa *, struct cnfa *); |
1212 |
*/ |
1213 |
static VOID |
1214 |
compact(nfa, cnfa) |
1215 |
struct nfa *nfa; |
1216 |
struct cnfa *cnfa; |
1217 |
{ |
1218 |
struct state *s; |
1219 |
struct arc *a; |
1220 |
size_t nstates; |
1221 |
size_t narcs; |
1222 |
struct carc *ca; |
1223 |
struct carc *first; |
1224 |
|
1225 |
assert (!NISERR()); |
1226 |
|
1227 |
nstates = 0; |
1228 |
narcs = 0; |
1229 |
for (s = nfa->states; s != NULL; s = s->next) { |
1230 |
nstates++; |
1231 |
narcs += 1 + s->nouts + 1; |
1232 |
/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ |
1233 |
} |
1234 |
|
1235 |
cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *)); |
1236 |
cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc)); |
1237 |
if (cnfa->states == NULL || cnfa->arcs == NULL) { |
1238 |
if (cnfa->states != NULL) |
1239 |
FREE(cnfa->states); |
1240 |
if (cnfa->arcs != NULL) |
1241 |
FREE(cnfa->arcs); |
1242 |
NERR(REG_ESPACE); |
1243 |
return; |
1244 |
} |
1245 |
cnfa->nstates = nstates; |
1246 |
cnfa->pre = nfa->pre->no; |
1247 |
cnfa->post = nfa->post->no; |
1248 |
cnfa->bos[0] = nfa->bos[0]; |
1249 |
cnfa->bos[1] = nfa->bos[1]; |
1250 |
cnfa->eos[0] = nfa->eos[0]; |
1251 |
cnfa->eos[1] = nfa->eos[1]; |
1252 |
cnfa->ncolors = maxcolor(nfa->cm) + 1; |
1253 |
cnfa->flags = 0; |
1254 |
|
1255 |
ca = cnfa->arcs; |
1256 |
for (s = nfa->states; s != NULL; s = s->next) { |
1257 |
assert((size_t)s->no < nstates); |
1258 |
cnfa->states[s->no] = ca; |
1259 |
ca->co = 0; /* clear and skip flags "arc" */ |
1260 |
ca++; |
1261 |
first = ca; |
1262 |
for (a = s->outs; a != NULL; a = a->outchain) |
1263 |
switch (a->type) { |
1264 |
case PLAIN: |
1265 |
ca->co = a->co; |
1266 |
ca->to = a->to->no; |
1267 |
ca++; |
1268 |
break; |
1269 |
case LACON: |
1270 |
assert(s->no != cnfa->pre); |
1271 |
ca->co = (color)(cnfa->ncolors + a->co); |
1272 |
ca->to = a->to->no; |
1273 |
ca++; |
1274 |
cnfa->flags |= HASLACONS; |
1275 |
break; |
1276 |
default: |
1277 |
assert(NOTREACHED); |
1278 |
break; |
1279 |
} |
1280 |
carcsort(first, ca-1); |
1281 |
ca->co = COLORLESS; |
1282 |
ca->to = 0; |
1283 |
ca++; |
1284 |
} |
1285 |
assert(ca == &cnfa->arcs[narcs]); |
1286 |
assert(cnfa->nstates != 0); |
1287 |
|
1288 |
/* mark no-progress states */ |
1289 |
for (a = nfa->pre->outs; a != NULL; a = a->outchain) |
1290 |
cnfa->states[a->to->no]->co = 1; |
1291 |
cnfa->states[nfa->pre->no]->co = 1; |
1292 |
} |
1293 |
|
1294 |
/* |
1295 |
- carcsort - sort compacted-NFA arcs by color |
1296 |
* Really dumb algorithm, but if the list is long enough for that to matter, |
1297 |
* you're in real trouble anyway. |
1298 |
^ static VOID carcsort(struct carc *, struct carc *); |
1299 |
*/ |
1300 |
static VOID |
1301 |
carcsort(first, last) |
1302 |
struct carc *first; |
1303 |
struct carc *last; |
1304 |
{ |
1305 |
struct carc *p; |
1306 |
struct carc *q; |
1307 |
struct carc tmp; |
1308 |
|
1309 |
if (last - first <= 1) |
1310 |
return; |
1311 |
|
1312 |
for (p = first; p <= last; p++) |
1313 |
for (q = p; q <= last; q++) |
1314 |
if (p->co > q->co || |
1315 |
(p->co == q->co && p->to > q->to)) { |
1316 |
assert(p != q); |
1317 |
tmp = *p; |
1318 |
*p = *q; |
1319 |
*q = tmp; |
1320 |
} |
1321 |
} |
1322 |
|
1323 |
/* |
1324 |
- freecnfa - free a compacted NFA |
1325 |
^ static VOID freecnfa(struct cnfa *); |
1326 |
*/ |
1327 |
static VOID |
1328 |
freecnfa(cnfa) |
1329 |
struct cnfa *cnfa; |
1330 |
{ |
1331 |
assert(cnfa->nstates != 0); /* not empty already */ |
1332 |
cnfa->nstates = 0; |
1333 |
FREE(cnfa->states); |
1334 |
FREE(cnfa->arcs); |
1335 |
} |
1336 |
|
1337 |
/* |
1338 |
- dumpnfa - dump an NFA in human-readable form |
1339 |
^ static VOID dumpnfa(struct nfa *, FILE *); |
1340 |
*/ |
1341 |
static VOID |
1342 |
dumpnfa(nfa, f) |
1343 |
struct nfa *nfa; |
1344 |
FILE *f; |
1345 |
{ |
1346 |
#ifdef REG_DEBUG |
1347 |
struct state *s; |
1348 |
|
1349 |
fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); |
1350 |
if (nfa->bos[0] != COLORLESS) |
1351 |
fprintf(f, ", bos [%ld]", (long)nfa->bos[0]); |
1352 |
if (nfa->bos[1] != COLORLESS) |
1353 |
fprintf(f, ", bol [%ld]", (long)nfa->bos[1]); |
1354 |
if (nfa->eos[0] != COLORLESS) |
1355 |
fprintf(f, ", eos [%ld]", (long)nfa->eos[0]); |
1356 |
if (nfa->eos[1] != COLORLESS) |
1357 |
fprintf(f, ", eol [%ld]", (long)nfa->eos[1]); |
1358 |
fprintf(f, "\n"); |
1359 |
for (s = nfa->states; s != NULL; s = s->next) |
1360 |
dumpstate(s, f); |
1361 |
if (nfa->parent == NULL) |
1362 |
dumpcolors(nfa->cm, f); |
1363 |
fflush(f); |
1364 |
#endif |
1365 |
} |
1366 |
|
1367 |
#ifdef REG_DEBUG /* subordinates of dumpnfa */ |
1368 |
/* |
1369 |
^ #ifdef REG_DEBUG |
1370 |
*/ |
1371 |
|
1372 |
/* |
1373 |
- dumpstate - dump an NFA state in human-readable form |
1374 |
^ static VOID dumpstate(struct state *, FILE *); |
1375 |
*/ |
1376 |
static VOID |
1377 |
dumpstate(s, f) |
1378 |
struct state *s; |
1379 |
FILE *f; |
1380 |
{ |
1381 |
struct arc *a; |
1382 |
|
1383 |
fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", |
1384 |
(s->flag) ? s->flag : '.'); |
1385 |
if (s->prev != NULL && s->prev->next != s) |
1386 |
fprintf(f, "\tstate chain bad\n"); |
1387 |
if (s->nouts == 0) |
1388 |
fprintf(f, "\tno out arcs\n"); |
1389 |
else |
1390 |
dumparcs(s, f); |
1391 |
fflush(f); |
1392 |
for (a = s->ins; a != NULL; a = a->inchain) { |
1393 |
if (a->to != s) |
1394 |
fprintf(f, "\tlink from %d to %d on %d's in-chain\n", |
1395 |
a->from->no, a->to->no, s->no); |
1396 |
} |
1397 |
} |
1398 |
|
1399 |
/* |
1400 |
- dumparcs - dump out-arcs in human-readable form |
1401 |
^ static VOID dumparcs(struct state *, FILE *); |
1402 |
*/ |
1403 |
static VOID |
1404 |
dumparcs(s, f) |
1405 |
struct state *s; |
1406 |
FILE *f; |
1407 |
{ |
1408 |
int pos; |
1409 |
|
1410 |
assert(s->nouts > 0); |
1411 |
/* printing arcs in reverse order is usually clearer */ |
1412 |
pos = dumprarcs(s->outs, s, f, 1); |
1413 |
if (pos != 1) |
1414 |
fprintf(f, "\n"); |
1415 |
} |
1416 |
|
1417 |
/* |
1418 |
- dumprarcs - dump remaining outarcs, recursively, in reverse order |
1419 |
^ static int dumprarcs(struct arc *, struct state *, FILE *, int); |
1420 |
*/ |
1421 |
static int /* resulting print position */ |
1422 |
dumprarcs(a, s, f, pos) |
1423 |
struct arc *a; |
1424 |
struct state *s; |
1425 |
FILE *f; |
1426 |
int pos; /* initial print position */ |
1427 |
{ |
1428 |
if (a->outchain != NULL) |
1429 |
pos = dumprarcs(a->outchain, s, f, pos); |
1430 |
dumparc(a, s, f); |
1431 |
if (pos == 5) { |
1432 |
fprintf(f, "\n"); |
1433 |
pos = 1; |
1434 |
} else |
1435 |
pos++; |
1436 |
return pos; |
1437 |
} |
1438 |
|
1439 |
/* |
1440 |
- dumparc - dump one outarc in readable form, including prefixing tab |
1441 |
^ static VOID dumparc(struct arc *, struct state *, FILE *); |
1442 |
*/ |
1443 |
static VOID |
1444 |
dumparc(a, s, f) |
1445 |
struct arc *a; |
1446 |
struct state *s; |
1447 |
FILE *f; |
1448 |
{ |
1449 |
struct arc *aa; |
1450 |
struct arcbatch *ab; |
1451 |
|
1452 |
fprintf(f, "\t"); |
1453 |
switch (a->type) { |
1454 |
case PLAIN: |
1455 |
fprintf(f, "[%ld]", (long)a->co); |
1456 |
break; |
1457 |
case AHEAD: |
1458 |
fprintf(f, ">%ld>", (long)a->co); |
1459 |
break; |
1460 |
case BEHIND: |
1461 |
fprintf(f, "<%ld<", (long)a->co); |
1462 |
break; |
1463 |
case LACON: |
1464 |
fprintf(f, ":%ld:", (long)a->co); |
1465 |
break; |
1466 |
case '^': |
1467 |
case '$': |
1468 |
fprintf(f, "%c%d", a->type, (int)a->co); |
1469 |
break; |
1470 |
case EMPTY: |
1471 |
break; |
1472 |
default: |
1473 |
fprintf(f, "0x%x/0%lo", a->type, (long)a->co); |
1474 |
break; |
1475 |
} |
1476 |
if (a->from != s) |
1477 |
fprintf(f, "?%d?", a->from->no); |
1478 |
for (ab = &a->from->oas; ab != NULL; ab = ab->next) { |
1479 |
for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) |
1480 |
if (aa == a) |
1481 |
break; /* NOTE BREAK OUT */ |
1482 |
if (aa < &ab->a[ABSIZE]) /* propagate break */ |
1483 |
break; /* NOTE BREAK OUT */ |
1484 |
} |
1485 |
if (ab == NULL) |
1486 |
fprintf(f, "?!?"); /* not in allocated space */ |
1487 |
fprintf(f, "->"); |
1488 |
if (a->to == NULL) { |
1489 |
fprintf(f, "NULL"); |
1490 |
return; |
1491 |
} |
1492 |
fprintf(f, "%d", a->to->no); |
1493 |
for (aa = a->to->ins; aa != NULL; aa = aa->inchain) |
1494 |
if (aa == a) |
1495 |
break; /* NOTE BREAK OUT */ |
1496 |
if (aa == NULL) |
1497 |
fprintf(f, "?!?"); /* missing from in-chain */ |
1498 |
} |
1499 |
|
1500 |
/* |
1501 |
^ #endif |
1502 |
*/ |
1503 |
#endif /* ifdef REG_DEBUG */ |
1504 |
|
1505 |
/* |
1506 |
- dumpcnfa - dump a compacted NFA in human-readable form |
1507 |
^ static VOID dumpcnfa(struct cnfa *, FILE *); |
1508 |
*/ |
1509 |
static VOID |
1510 |
dumpcnfa(cnfa, f) |
1511 |
struct cnfa *cnfa; |
1512 |
FILE *f; |
1513 |
{ |
1514 |
#ifdef REG_DEBUG |
1515 |
int st; |
1516 |
|
1517 |
fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); |
1518 |
if (cnfa->bos[0] != COLORLESS) |
1519 |
fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]); |
1520 |
if (cnfa->bos[1] != COLORLESS) |
1521 |
fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]); |
1522 |
if (cnfa->eos[0] != COLORLESS) |
1523 |
fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]); |
1524 |
if (cnfa->eos[1] != COLORLESS) |
1525 |
fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]); |
1526 |
if (cnfa->flags&HASLACONS) |
1527 |
fprintf(f, ", haslacons"); |
1528 |
fprintf(f, "\n"); |
1529 |
for (st = 0; st < cnfa->nstates; st++) |
1530 |
dumpcstate(st, cnfa->states[st], cnfa, f); |
1531 |
fflush(f); |
1532 |
#endif |
1533 |
} |
1534 |
|
1535 |
#ifdef REG_DEBUG /* subordinates of dumpcnfa */ |
1536 |
/* |
1537 |
^ #ifdef REG_DEBUG |
1538 |
*/ |
1539 |
|
1540 |
/* |
1541 |
- dumpcstate - dump a compacted-NFA state in human-readable form |
1542 |
^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); |
1543 |
*/ |
1544 |
static VOID |
1545 |
dumpcstate(st, ca, cnfa, f) |
1546 |
int st; |
1547 |
struct carc *ca; |
1548 |
struct cnfa *cnfa; |
1549 |
FILE *f; |
1550 |
{ |
1551 |
int i; |
1552 |
int pos; |
1553 |
|
1554 |
fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); |
1555 |
pos = 1; |
1556 |
for (i = 1; ca[i].co != COLORLESS; i++) { |
1557 |
if (ca[i].co < cnfa->ncolors) |
1558 |
fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to); |
1559 |
else |
1560 |
fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors, |
1561 |
ca[i].to); |
1562 |
if (pos == 5) { |
1563 |
fprintf(f, "\n"); |
1564 |
pos = 1; |
1565 |
} else |
1566 |
pos++; |
1567 |
} |
1568 |
if (i == 1 || pos != 1) |
1569 |
fprintf(f, "\n"); |
1570 |
fflush(f); |
1571 |
} |
1572 |
|
1573 |
/* |
1574 |
^ #endif |
1575 |
*/ |
1576 |
#endif /* ifdef REG_DEBUG */ |
1577 |
|
1578 |
/* End of rege_nfa.c */ |