/[dtapublic]/projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regc_lex.c
ViewVC logotype

Contents of /projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regc_lex.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 71 - (show annotations) (download)
Sat Nov 5 11:07:06 2016 UTC (8 years, 1 month ago) by dashley
File MIME type: text/plain
File size: 24535 byte(s)
Set EOL properties appropriately to facilitate simultaneous Linux and Windows development.
1 /* $Header$ */
2 /*
3 * lexical analyzer
4 * This file is #included by regcomp.c.
5 *
6 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 *
8 * Development of this software was funded, in part, by Cray Research Inc.,
9 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
10 * Corporation, none of whom are responsible for the results. The author
11 * thanks all of them.
12 *
13 * Redistribution and use in source and binary forms -- with or without
14 * modification -- are permitted for any purpose, provided that
15 * redistributions in source form retain this entire copyright notice and
16 * indicate the origin and nature of any modifications.
17 *
18 * I'd appreciate being given credit for this package in the documentation
19 * of software which uses it, but that is not a requirement.
20 *
21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
22 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
23 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
24 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 */
33
34 /* scanning macros (know about v) */
35 #define ATEOS() (v->now >= v->stop)
36 #define HAVE(n) (v->stop - v->now >= (n))
37 #define NEXT1(c) (!ATEOS() && *v->now == CHR(c))
38 #define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
39 #define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \
40 *(v->now+1) == CHR(b) && \
41 *(v->now+2) == CHR(c))
42 #define SET(c) (v->nexttype = (c))
43 #define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n))
44 #define RET(c) return (SET(c), 1)
45 #define RETV(c, n) return (SETV(c, n), 1)
46 #define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */
47 #define LASTTYPE(t) (v->lasttype == (t))
48
49 /* lexical contexts */
50 #define L_ERE 1 /* mainline ERE/ARE */
51 #define L_BRE 2 /* mainline BRE */
52 #define L_Q 3 /* REG_QUOTE */
53 #define L_EBND 4 /* ERE/ARE bound */
54 #define L_BBND 5 /* BRE bound */
55 #define L_BRACK 6 /* brackets */
56 #define L_CEL 7 /* collating element */
57 #define L_ECL 8 /* equivalence class */
58 #define L_CCL 9 /* character class */
59 #define INTOCON(c) (v->lexcon = (c))
60 #define INCON(con) (v->lexcon == (con))
61
62 /* construct pointer past end of chr array */
63 #define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
64
65 /*
66 - lexstart - set up lexical stuff, scan leading options
67 ^ static VOID lexstart(struct vars *);
68 */
69 static VOID
70 lexstart(v)
71 struct vars *v;
72 {
73 prefixes(v); /* may turn on new type bits etc. */
74 NOERR();
75
76 if (v->cflags&REG_QUOTE) {
77 assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
78 INTOCON(L_Q);
79 } else if (v->cflags&REG_EXTENDED) {
80 assert(!(v->cflags&REG_QUOTE));
81 INTOCON(L_ERE);
82 } else {
83 assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
84 INTOCON(L_BRE);
85 }
86
87 v->nexttype = EMPTY; /* remember we were at the start */
88 next(v); /* set up the first token */
89 }
90
91 /*
92 - prefixes - implement various special prefixes
93 ^ static VOID prefixes(struct vars *);
94 */
95 static VOID
96 prefixes(v)
97 struct vars *v;
98 {
99 /* literal string doesn't get any of this stuff */
100 if (v->cflags&REG_QUOTE)
101 return;
102
103 /* initial "***" gets special things */
104 if (HAVE(4) && NEXT3('*', '*', '*'))
105 switch (*(v->now + 3)) {
106 case CHR('?'): /* "***?" error, msg shows version */
107 ERR(REG_BADPAT);
108 return; /* proceed no further */
109 break;
110 case CHR('='): /* "***=" shifts to literal string */
111 NOTE(REG_UNONPOSIX);
112 v->cflags |= REG_QUOTE;
113 v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
114 v->now += 4;
115 return; /* and there can be no more prefixes */
116 break;
117 case CHR(':'): /* "***:" shifts to AREs */
118 NOTE(REG_UNONPOSIX);
119 v->cflags |= REG_ADVANCED;
120 v->now += 4;
121 break;
122 default: /* otherwise *** is just an error */
123 ERR(REG_BADRPT);
124 return;
125 break;
126 }
127
128 /* BREs and EREs don't get embedded options */
129 if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
130 return;
131
132 /* embedded options (AREs only) */
133 if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
134 NOTE(REG_UNONPOSIX);
135 v->now += 2;
136 for (; !ATEOS() && iscalpha(*v->now); v->now++)
137 switch (*v->now) {
138 case CHR('b'): /* BREs (but why???) */
139 v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
140 break;
141 case CHR('c'): /* case sensitive */
142 v->cflags &= ~REG_ICASE;
143 break;
144 case CHR('e'): /* plain EREs */
145 v->cflags |= REG_EXTENDED;
146 v->cflags &= ~(REG_ADVF|REG_QUOTE);
147 break;
148 case CHR('i'): /* case insensitive */
149 v->cflags |= REG_ICASE;
150 break;
151 case CHR('m'): /* Perloid synonym for n */
152 case CHR('n'): /* \n affects ^ $ . [^ */
153 v->cflags |= REG_NEWLINE;
154 break;
155 case CHR('p'): /* ~Perl, \n affects . [^ */
156 v->cflags |= REG_NLSTOP;
157 v->cflags &= ~REG_NLANCH;
158 break;
159 case CHR('q'): /* literal string */
160 v->cflags |= REG_QUOTE;
161 v->cflags &= ~REG_ADVANCED;
162 break;
163 case CHR('s'): /* single line, \n ordinary */
164 v->cflags &= ~REG_NEWLINE;
165 break;
166 case CHR('t'): /* tight syntax */
167 v->cflags &= ~REG_EXPANDED;
168 break;
169 case CHR('w'): /* weird, \n affects ^ $ only */
170 v->cflags &= ~REG_NLSTOP;
171 v->cflags |= REG_NLANCH;
172 break;
173 case CHR('x'): /* expanded syntax */
174 v->cflags |= REG_EXPANDED;
175 break;
176 default:
177 ERR(REG_BADOPT);
178 return;
179 }
180 if (!NEXT1(')')) {
181 ERR(REG_BADOPT);
182 return;
183 }
184 v->now++;
185 if (v->cflags&REG_QUOTE)
186 v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
187 }
188 }
189
190 /*
191 - lexnest - "call a subroutine", interpolating string at the lexical level
192 * Note, this is not a very general facility. There are a number of
193 * implicit assumptions about what sorts of strings can be subroutines.
194 ^ static VOID lexnest(struct vars *, chr *, chr *);
195 */
196 static VOID
197 lexnest(v, beginp, endp)
198 struct vars *v;
199 chr *beginp; /* start of interpolation */
200 chr *endp; /* one past end of interpolation */
201 {
202 assert(v->savenow == NULL); /* only one level of nesting */
203 v->savenow = v->now;
204 v->savestop = v->stop;
205 v->now = beginp;
206 v->stop = endp;
207 }
208
209 /*
210 * string constants to interpolate as expansions of things like \d
211 */
212 static chr backd[] = { /* \d */
213 CHR('['), CHR('['), CHR(':'),
214 CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
215 CHR(':'), CHR(']'), CHR(']')
216 };
217 static chr backD[] = { /* \D */
218 CHR('['), CHR('^'), CHR('['), CHR(':'),
219 CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
220 CHR(':'), CHR(']'), CHR(']')
221 };
222 static chr brbackd[] = { /* \d within brackets */
223 CHR('['), CHR(':'),
224 CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
225 CHR(':'), CHR(']')
226 };
227 static chr backs[] = { /* \s */
228 CHR('['), CHR('['), CHR(':'),
229 CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
230 CHR(':'), CHR(']'), CHR(']')
231 };
232 static chr backS[] = { /* \S */
233 CHR('['), CHR('^'), CHR('['), CHR(':'),
234 CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
235 CHR(':'), CHR(']'), CHR(']')
236 };
237 static chr brbacks[] = { /* \s within brackets */
238 CHR('['), CHR(':'),
239 CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
240 CHR(':'), CHR(']')
241 };
242 static chr backw[] = { /* \w */
243 CHR('['), CHR('['), CHR(':'),
244 CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
245 CHR(':'), CHR(']'), CHR('_'), CHR(']')
246 };
247 static chr backW[] = { /* \W */
248 CHR('['), CHR('^'), CHR('['), CHR(':'),
249 CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
250 CHR(':'), CHR(']'), CHR('_'), CHR(']')
251 };
252 static chr brbackw[] = { /* \w within brackets */
253 CHR('['), CHR(':'),
254 CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
255 CHR(':'), CHR(']'), CHR('_')
256 };
257
258 /*
259 - lexword - interpolate a bracket expression for word characters
260 * Possibly ought to inquire whether there is a "word" character class.
261 ^ static VOID lexword(struct vars *);
262 */
263 static VOID
264 lexword(v)
265 struct vars *v;
266 {
267 lexnest(v, backw, ENDOF(backw));
268 }
269
270 /*
271 - next - get next token
272 ^ static int next(struct vars *);
273 */
274 static int /* 1 normal, 0 failure */
275 next(v)
276 struct vars *v;
277 {
278 chr c;
279
280 /* errors yield an infinite sequence of failures */
281 if (ISERR())
282 return 0; /* the error has set nexttype to EOS */
283
284 /* remember flavor of last token */
285 v->lasttype = v->nexttype;
286
287 /* REG_BOSONLY */
288 if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
289 /* at start of a REG_BOSONLY RE */
290 RETV(SBEGIN, 0); /* same as \A */
291 }
292
293 /* if we're nested and we've hit end, return to outer level */
294 if (v->savenow != NULL && ATEOS()) {
295 v->now = v->savenow;
296 v->stop = v->savestop;
297 v->savenow = v->savestop = NULL;
298 }
299
300 /* skip white space etc. if appropriate (not in literal or []) */
301 if (v->cflags&REG_EXPANDED)
302 switch (v->lexcon) {
303 case L_ERE:
304 case L_BRE:
305 case L_EBND:
306 case L_BBND:
307 skip(v);
308 break;
309 }
310
311 /* handle EOS, depending on context */
312 if (ATEOS()) {
313 switch (v->lexcon) {
314 case L_ERE:
315 case L_BRE:
316 case L_Q:
317 RET(EOS);
318 break;
319 case L_EBND:
320 case L_BBND:
321 FAILW(REG_EBRACE);
322 break;
323 case L_BRACK:
324 case L_CEL:
325 case L_ECL:
326 case L_CCL:
327 FAILW(REG_EBRACK);
328 break;
329 }
330 assert(NOTREACHED);
331 }
332
333 /* okay, time to actually get a character */
334 c = *v->now++;
335
336 /* deal with the easy contexts, punt EREs to code below */
337 switch (v->lexcon) {
338 case L_BRE: /* punt BREs to separate function */
339 return brenext(v, c);
340 break;
341 case L_ERE: /* see below */
342 break;
343 case L_Q: /* literal strings are easy */
344 RETV(PLAIN, c);
345 break;
346 case L_BBND: /* bounds are fairly simple */
347 case L_EBND:
348 switch (c) {
349 case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
350 case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
351 case CHR('8'): case CHR('9'):
352 RETV(DIGIT, (chr)DIGITVAL(c));
353 break;
354 case CHR(','):
355 RET(',');
356 break;
357 case CHR('}'): /* ERE bound ends with } */
358 if (INCON(L_EBND)) {
359 INTOCON(L_ERE);
360 if ((v->cflags&REG_ADVF) && NEXT1('?')) {
361 v->now++;
362 NOTE(REG_UNONPOSIX);
363 RETV('}', 0);
364 }
365 RETV('}', 1);
366 } else
367 FAILW(REG_BADBR);
368 break;
369 case CHR('\\'): /* BRE bound ends with \} */
370 if (INCON(L_BBND) && NEXT1('}')) {
371 v->now++;
372 INTOCON(L_BRE);
373 RET('}');
374 } else
375 FAILW(REG_BADBR);
376 break;
377 default:
378 FAILW(REG_BADBR);
379 break;
380 }
381 assert(NOTREACHED);
382 break;
383 case L_BRACK: /* brackets are not too hard */
384 switch (c) {
385 case CHR(']'):
386 if (LASTTYPE('['))
387 RETV(PLAIN, c);
388 else {
389 INTOCON((v->cflags&REG_EXTENDED) ?
390 L_ERE : L_BRE);
391 RET(']');
392 }
393 break;
394 case CHR('\\'):
395 NOTE(REG_UBBS);
396 if (!(v->cflags&REG_ADVF))
397 RETV(PLAIN, c);
398 NOTE(REG_UNONPOSIX);
399 if (ATEOS())
400 FAILW(REG_EESCAPE);
401 (DISCARD)lexescape(v);
402 switch (v->nexttype) { /* not all escapes okay here */
403 case PLAIN:
404 return 1;
405 break;
406 case CCLASS:
407 switch (v->nextvalue) {
408 case 'd':
409 lexnest(v, brbackd, ENDOF(brbackd));
410 break;
411 case 's':
412 lexnest(v, brbacks, ENDOF(brbacks));
413 break;
414 case 'w':
415 lexnest(v, brbackw, ENDOF(brbackw));
416 break;
417 default:
418 FAILW(REG_EESCAPE);
419 break;
420 }
421 /* lexnest done, back up and try again */
422 v->nexttype = v->lasttype;
423 return next(v);
424 break;
425 }
426 /* not one of the acceptable escapes */
427 FAILW(REG_EESCAPE);
428 break;
429 case CHR('-'):
430 if (LASTTYPE('[') || NEXT1(']'))
431 RETV(PLAIN, c);
432 else
433 RETV(RANGE, c);
434 break;
435 case CHR('['):
436 if (ATEOS())
437 FAILW(REG_EBRACK);
438 switch (*v->now++) {
439 case CHR('.'):
440 INTOCON(L_CEL);
441 /* might or might not be locale-specific */
442 RET(COLLEL);
443 break;
444 case CHR('='):
445 INTOCON(L_ECL);
446 NOTE(REG_ULOCALE);
447 RET(ECLASS);
448 break;
449 case CHR(':'):
450 INTOCON(L_CCL);
451 NOTE(REG_ULOCALE);
452 RET(CCLASS);
453 break;
454 default: /* oops */
455 v->now--;
456 RETV(PLAIN, c);
457 break;
458 }
459 assert(NOTREACHED);
460 break;
461 default:
462 RETV(PLAIN, c);
463 break;
464 }
465 assert(NOTREACHED);
466 break;
467 case L_CEL: /* collating elements are easy */
468 if (c == CHR('.') && NEXT1(']')) {
469 v->now++;
470 INTOCON(L_BRACK);
471 RETV(END, '.');
472 } else
473 RETV(PLAIN, c);
474 break;
475 case L_ECL: /* ditto equivalence classes */
476 if (c == CHR('=') && NEXT1(']')) {
477 v->now++;
478 INTOCON(L_BRACK);
479 RETV(END, '=');
480 } else
481 RETV(PLAIN, c);
482 break;
483 case L_CCL: /* ditto character classes */
484 if (c == CHR(':') && NEXT1(']')) {
485 v->now++;
486 INTOCON(L_BRACK);
487 RETV(END, ':');
488 } else
489 RETV(PLAIN, c);
490 break;
491 default:
492 assert(NOTREACHED);
493 break;
494 }
495
496 /* that got rid of everything except EREs and AREs */
497 assert(INCON(L_ERE));
498
499 /* deal with EREs and AREs, except for backslashes */
500 switch (c) {
501 case CHR('|'):
502 RET('|');
503 break;
504 case CHR('*'):
505 if ((v->cflags&REG_ADVF) && NEXT1('?')) {
506 v->now++;
507 NOTE(REG_UNONPOSIX);
508 RETV('*', 0);
509 }
510 RETV('*', 1);
511 break;
512 case CHR('+'):
513 if ((v->cflags&REG_ADVF) && NEXT1('?')) {
514 v->now++;
515 NOTE(REG_UNONPOSIX);
516 RETV('+', 0);
517 }
518 RETV('+', 1);
519 break;
520 case CHR('?'):
521 if ((v->cflags&REG_ADVF) && NEXT1('?')) {
522 v->now++;
523 NOTE(REG_UNONPOSIX);
524 RETV('?', 0);
525 }
526 RETV('?', 1);
527 break;
528 case CHR('{'): /* bounds start or plain character */
529 if (v->cflags&REG_EXPANDED)
530 skip(v);
531 if (ATEOS() || !iscdigit(*v->now)) {
532 NOTE(REG_UBRACES);
533 NOTE(REG_UUNSPEC);
534 RETV(PLAIN, c);
535 } else {
536 NOTE(REG_UBOUNDS);
537 INTOCON(L_EBND);
538 RET('{');
539 }
540 assert(NOTREACHED);
541 break;
542 case CHR('('): /* parenthesis, or advanced extension */
543 if ((v->cflags&REG_ADVF) && NEXT1('?')) {
544 NOTE(REG_UNONPOSIX);
545 v->now++;
546 switch (*v->now++) {
547 case CHR(':'): /* non-capturing paren */
548 RETV('(', 0);
549 break;
550 case CHR('#'): /* comment */
551 while (!ATEOS() && *v->now != CHR(')'))
552 v->now++;
553 if (!ATEOS())
554 v->now++;
555 assert(v->nexttype == v->lasttype);
556 return next(v);
557 break;
558 case CHR('='): /* positive lookahead */
559 NOTE(REG_ULOOKAHEAD);
560 RETV(LACON, 1);
561 break;
562 case CHR('!'): /* negative lookahead */
563 NOTE(REG_ULOOKAHEAD);
564 RETV(LACON, 0);
565 break;
566 default:
567 FAILW(REG_BADRPT);
568 break;
569 }
570 assert(NOTREACHED);
571 }
572 if (v->cflags&REG_NOSUB)
573 RETV('(', 0); /* all parens non-capturing */
574 else
575 RETV('(', 1);
576 break;
577 case CHR(')'):
578 if (LASTTYPE('(')) {
579 NOTE(REG_UUNSPEC);
580 }
581 RETV(')', c);
582 break;
583 case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
584 if (HAVE(6) && *(v->now+0) == CHR('[') &&
585 *(v->now+1) == CHR(':') &&
586 (*(v->now+2) == CHR('<') ||
587 *(v->now+2) == CHR('>')) &&
588 *(v->now+3) == CHR(':') &&
589 *(v->now+4) == CHR(']') &&
590 *(v->now+5) == CHR(']')) {
591 c = *(v->now+2);
592 v->now += 6;
593 NOTE(REG_UNONPOSIX);
594 RET((c == CHR('<')) ? '<' : '>');
595 }
596 INTOCON(L_BRACK);
597 if (NEXT1('^')) {
598 v->now++;
599 RETV('[', 0);
600 }
601 RETV('[', 1);
602 break;
603 case CHR('.'):
604 RET('.');
605 break;
606 case CHR('^'):
607 RET('^');
608 break;
609 case CHR('$'):
610 RET('$');
611 break;
612 case CHR('\\'): /* mostly punt backslashes to code below */
613 if (ATEOS())
614 FAILW(REG_EESCAPE);
615 break;
616 default: /* ordinary character */
617 RETV(PLAIN, c);
618 break;
619 }
620
621 /* ERE/ARE backslash handling; backslash already eaten */
622 assert(!ATEOS());
623 if (!(v->cflags&REG_ADVF)) { /* only AREs have non-trivial escapes */
624 if (iscalnum(*v->now)) {
625 NOTE(REG_UBSALNUM);
626 NOTE(REG_UUNSPEC);
627 }
628 RETV(PLAIN, *v->now++);
629 }
630 (DISCARD)lexescape(v);
631 if (ISERR())
632 FAILW(REG_EESCAPE);
633 if (v->nexttype == CCLASS) { /* fudge at lexical level */
634 switch (v->nextvalue) {
635 case 'd': lexnest(v, backd, ENDOF(backd)); break;
636 case 'D': lexnest(v, backD, ENDOF(backD)); break;
637 case 's': lexnest(v, backs, ENDOF(backs)); break;
638 case 'S': lexnest(v, backS, ENDOF(backS)); break;
639 case 'w': lexnest(v, backw, ENDOF(backw)); break;
640 case 'W': lexnest(v, backW, ENDOF(backW)); break;
641 default:
642 assert(NOTREACHED);
643 FAILW(REG_ASSERT);
644 break;
645 }
646 /* lexnest done, back up and try again */
647 v->nexttype = v->lasttype;
648 return next(v);
649 }
650 /* otherwise, lexescape has already done the work */
651 return !ISERR();
652 }
653
654 /*
655 - lexescape - parse an ARE backslash escape (backslash already eaten)
656 * Note slightly nonstandard use of the CCLASS type code.
657 ^ static int lexescape(struct vars *);
658 */
659 static int /* not actually used, but convenient for RETV */
660 lexescape(v)
661 struct vars *v;
662 {
663 chr c;
664 static chr alert[] = {
665 CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
666 };
667 static chr esc[] = {
668 CHR('E'), CHR('S'), CHR('C')
669 };
670 chr *save;
671
672 assert(v->cflags&REG_ADVF);
673
674 assert(!ATEOS());
675 c = *v->now++;
676 if (!iscalnum(c))
677 RETV(PLAIN, c);
678
679 NOTE(REG_UNONPOSIX);
680 switch (c) {
681 case CHR('a'):
682 RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
683 break;
684 case CHR('A'):
685 RETV(SBEGIN, 0);
686 break;
687 case CHR('b'):
688 RETV(PLAIN, CHR('\b'));
689 break;
690 case CHR('B'):
691 RETV(PLAIN, CHR('\\'));
692 break;
693 case CHR('c'):
694 NOTE(REG_UUNPORT);
695 if (ATEOS())
696 FAILW(REG_EESCAPE);
697 RETV(PLAIN, (chr)(*v->now++ & 037));
698 break;
699 case CHR('d'):
700 NOTE(REG_ULOCALE);
701 RETV(CCLASS, 'd');
702 break;
703 case CHR('D'):
704 NOTE(REG_ULOCALE);
705 RETV(CCLASS, 'D');
706 break;
707 case CHR('e'):
708 NOTE(REG_UUNPORT);
709 RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
710 break;
711 case CHR('f'):
712 RETV(PLAIN, CHR('\f'));
713 break;
714 case CHR('m'):
715 RET('<');
716 break;
717 case CHR('M'):
718 RET('>');
719 break;
720 case CHR('n'):
721 RETV(PLAIN, CHR('\n'));
722 break;
723 case CHR('r'):
724 RETV(PLAIN, CHR('\r'));
725 break;
726 case CHR('s'):
727 NOTE(REG_ULOCALE);
728 RETV(CCLASS, 's');
729 break;
730 case CHR('S'):
731 NOTE(REG_ULOCALE);
732 RETV(CCLASS, 'S');
733 break;
734 case CHR('t'):
735 RETV(PLAIN, CHR('\t'));
736 break;
737 case CHR('u'):
738 c = lexdigits(v, 16, 4, 4);
739 if (ISERR())
740 FAILW(REG_EESCAPE);
741 RETV(PLAIN, c);
742 break;
743 case CHR('U'):
744 c = lexdigits(v, 16, 8, 8);
745 if (ISERR())
746 FAILW(REG_EESCAPE);
747 RETV(PLAIN, c);
748 break;
749 case CHR('v'):
750 RETV(PLAIN, CHR('\v'));
751 break;
752 case CHR('w'):
753 NOTE(REG_ULOCALE);
754 RETV(CCLASS, 'w');
755 break;
756 case CHR('W'):
757 NOTE(REG_ULOCALE);
758 RETV(CCLASS, 'W');
759 break;
760 case CHR('x'):
761 NOTE(REG_UUNPORT);
762 c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
763 if (ISERR())
764 FAILW(REG_EESCAPE);
765 RETV(PLAIN, c);
766 break;
767 case CHR('y'):
768 NOTE(REG_ULOCALE);
769 RETV(WBDRY, 0);
770 break;
771 case CHR('Y'):
772 NOTE(REG_ULOCALE);
773 RETV(NWBDRY, 0);
774 break;
775 case CHR('Z'):
776 RETV(SEND, 0);
777 break;
778 case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
779 case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
780 case CHR('9'):
781 save = v->now;
782 v->now--; /* put first digit back */
783 c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
784 if (ISERR())
785 FAILW(REG_EESCAPE);
786 /* ugly heuristic (first test is "exactly 1 digit?") */
787 if (v->now - save == 0 || (int)c <= v->nsubexp) {
788 NOTE(REG_UBACKREF);
789 RETV(BACKREF, (chr)c);
790 }
791 /* oops, doesn't look like it's a backref after all... */
792 v->now = save;
793 /* and fall through into octal number */
794 case CHR('0'):
795 NOTE(REG_UUNPORT);
796 v->now--; /* put first digit back */
797 c = lexdigits(v, 8, 1, 3);
798 if (ISERR())
799 FAILW(REG_EESCAPE);
800 RETV(PLAIN, c);
801 break;
802 default:
803 assert(iscalpha(c));
804 FAILW(REG_EESCAPE); /* unknown alphabetic escape */
805 break;
806 }
807 assert(NOTREACHED);
808 }
809
810 /*
811 - lexdigits - slurp up digits and return chr value
812 ^ static chr lexdigits(struct vars *, int, int, int);
813 */
814 static chr /* chr value; errors signalled via ERR */
815 lexdigits(v, base, minlen, maxlen)
816 struct vars *v;
817 int base;
818 int minlen;
819 int maxlen;
820 {
821 uchr n; /* unsigned to avoid overflow misbehavior */
822 int len;
823 chr c;
824 int d;
825 CONST uchr ub = (uchr) base;
826
827 n = 0;
828 for (len = 0; len < maxlen && !ATEOS(); len++) {
829 c = *v->now++;
830 switch (c) {
831 case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
832 case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
833 case CHR('8'): case CHR('9'):
834 d = DIGITVAL(c);
835 break;
836 case CHR('a'): case CHR('A'): d = 10; break;
837 case CHR('b'): case CHR('B'): d = 11; break;
838 case CHR('c'): case CHR('C'): d = 12; break;
839 case CHR('d'): case CHR('D'): d = 13; break;
840 case CHR('e'): case CHR('E'): d = 14; break;
841 case CHR('f'): case CHR('F'): d = 15; break;
842 default:
843 v->now--; /* oops, not a digit at all */
844 d = -1;
845 break;
846 }
847
848 if (d >= base) { /* not a plausible digit */
849 v->now--;
850 d = -1;
851 }
852 if (d < 0)
853 break; /* NOTE BREAK OUT */
854 n = n*ub + (uchr)d;
855 }
856 if (len < minlen)
857 ERR(REG_EESCAPE);
858
859 return (chr)n;
860 }
861
862 /*
863 - brenext - get next BRE token
864 * This is much like EREs except for all the stupid backslashes and the
865 * context-dependency of some things.
866 ^ static int brenext(struct vars *, pchr);
867 */
868 static int /* 1 normal, 0 failure */
869 brenext(v, pc)
870 struct vars *v;
871 pchr pc;
872 {
873 chr c = (chr)pc;
874
875 switch (c) {
876 case CHR('*'):
877 if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
878 RETV(PLAIN, c);
879 RET('*');
880 break;
881 case CHR('['):
882 if (HAVE(6) && *(v->now+0) == CHR('[') &&
883 *(v->now+1) == CHR(':') &&
884 (*(v->now+2) == CHR('<') ||
885 *(v->now+2) == CHR('>')) &&
886 *(v->now+3) == CHR(':') &&
887 *(v->now+4) == CHR(']') &&
888 *(v->now+5) == CHR(']')) {
889 c = *(v->now+2);
890 v->now += 6;
891 NOTE(REG_UNONPOSIX);
892 RET((c == CHR('<')) ? '<' : '>');
893 }
894 INTOCON(L_BRACK);
895 if (NEXT1('^')) {
896 v->now++;
897 RETV('[', 0);
898 }
899 RETV('[', 1);
900 break;
901 case CHR('.'):
902 RET('.');
903 break;
904 case CHR('^'):
905 if (LASTTYPE(EMPTY))
906 RET('^');
907 if (LASTTYPE('(')) {
908 NOTE(REG_UUNSPEC);
909 RET('^');
910 }
911 RETV(PLAIN, c);
912 break;
913 case CHR('$'):
914 if (v->cflags&REG_EXPANDED)
915 skip(v);
916 if (ATEOS())
917 RET('$');
918 if (NEXT2('\\', ')')) {
919 NOTE(REG_UUNSPEC);
920 RET('$');
921 }
922 RETV(PLAIN, c);
923 break;
924 case CHR('\\'):
925 break; /* see below */
926 default:
927 RETV(PLAIN, c);
928 break;
929 }
930
931 assert(c == CHR('\\'));
932
933 if (ATEOS())
934 FAILW(REG_EESCAPE);
935
936 c = *v->now++;
937 switch (c) {
938 case CHR('{'):
939 INTOCON(L_BBND);
940 NOTE(REG_UBOUNDS);
941 RET('{');
942 break;
943 case CHR('('):
944 RETV('(', 1);
945 break;
946 case CHR(')'):
947 RETV(')', c);
948 break;
949 case CHR('<'):
950 NOTE(REG_UNONPOSIX);
951 RET('<');
952 break;
953 case CHR('>'):
954 NOTE(REG_UNONPOSIX);
955 RET('>');
956 break;
957 case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
958 case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
959 case CHR('9'):
960 NOTE(REG_UBACKREF);
961 RETV(BACKREF, (chr)DIGITVAL(c));
962 break;
963 default:
964 if (iscalnum(c)) {
965 NOTE(REG_UBSALNUM);
966 NOTE(REG_UUNSPEC);
967 }
968 RETV(PLAIN, c);
969 break;
970 }
971
972 assert(NOTREACHED);
973 }
974
975 /*
976 - skip - skip white space and comments in expanded form
977 ^ static VOID skip(struct vars *);
978 */
979 static VOID
980 skip(v)
981 struct vars *v;
982 {
983 chr *start = v->now;
984
985 assert(v->cflags&REG_EXPANDED);
986
987 for (;;) {
988 while (!ATEOS() && iscspace(*v->now))
989 v->now++;
990 if (ATEOS() || *v->now != CHR('#'))
991 break; /* NOTE BREAK OUT */
992 assert(NEXT1('#'));
993 while (!ATEOS() && *v->now != CHR('\n'))
994 v->now++;
995 /* leave the newline to be picked up by the iscspace loop */
996 }
997
998 if (v->now != start)
999 NOTE(REG_UNONPOSIX);
1000 }
1001
1002 /*
1003 - newline - return the chr for a newline
1004 * This helps confine use of CHR to this source file.
1005 ^ static chr newline(NOPARMS);
1006 */
1007 static chr
1008 newline()
1009 {
1010 return CHR('\n');
1011 }
1012
1013 /*
1014 - ch - return the chr sequence for regc_locale.c's fake collating element ch
1015 * This helps confine use of CHR to this source file. Beware that the caller
1016 * knows how long the sequence is.
1017 ^ #ifdef REG_DEBUG
1018 ^ static chr *ch(NOPARMS);
1019 ^ #endif
1020 */
1021 #ifdef REG_DEBUG
1022 static chr *
1023 ch()
1024 {
1025 static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
1026
1027 return chstr;
1028 }
1029 #endif
1030
1031 /*
1032 - chrnamed - return the chr known by a given (chr string) name
1033 * The code is a bit clumsy, but this routine gets only such specialized
1034 * use that it hardly matters.
1035 ^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
1036 */
1037 static chr
1038 chrnamed(v, startp, endp, lastresort)
1039 struct vars *v;
1040 chr *startp; /* start of name */
1041 chr *endp; /* just past end of name */
1042 pchr lastresort; /* what to return if name lookup fails */
1043 {
1044 celt c;
1045 int errsave;
1046 int e;
1047 struct cvec *cv;
1048
1049 errsave = v->err;
1050 v->err = 0;
1051 c = element(v, startp, endp);
1052 e = v->err;
1053 v->err = errsave;
1054
1055 if (e != 0)
1056 return (chr)lastresort;
1057
1058 cv = range(v, c, c, 0);
1059 if (cv->nchrs == 0)
1060 return (chr)lastresort;
1061 return cv->chrs[0];
1062 }
1063
1064 /* End of regc_lex.c */

Properties

Name Value
svn:eol-style native
svn:keywords Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25