/[dtapublic]/projs/ets/trunk/src/c_tcl_base_7_5_w_mods/regc_lex.c
ViewVC logotype

Annotation of /projs/ets/trunk/src/c_tcl_base_7_5_w_mods/regc_lex.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 98 - (hide annotations) (download)
Sun Dec 18 00:57:31 2016 UTC (7 years, 6 months ago) by dashley
Original Path: projs/dtats/trunk/shared_source/c_tcl_base_7_5_w_mods/regc_lex.c
File MIME type: text/plain
File size: 24535 byte(s)
Reorganization.
1 dashley 71 /* $Header$ */
2     /*
3     * lexical analyzer
4     * This file is #included by regcomp.c.
5     *
6     * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7     *
8     * Development of this software was funded, in part, by Cray Research Inc.,
9     * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
10     * Corporation, none of whom are responsible for the results. The author
11     * thanks all of them.
12     *
13     * Redistribution and use in source and binary forms -- with or without
14     * modification -- are permitted for any purpose, provided that
15     * redistributions in source form retain this entire copyright notice and
16     * indicate the origin and nature of any modifications.
17     *
18     * I'd appreciate being given credit for this package in the documentation
19     * of software which uses it, but that is not a requirement.
20     *
21     * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
22     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
23     * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
24     * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25     * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26     * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27     * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28     * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29     * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30     * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     *
32     */
33    
34     /* scanning macros (know about v) */
35     #define ATEOS() (v->now >= v->stop)
36     #define HAVE(n) (v->stop - v->now >= (n))
37     #define NEXT1(c) (!ATEOS() && *v->now == CHR(c))
38     #define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
39     #define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \
40     *(v->now+1) == CHR(b) && \
41     *(v->now+2) == CHR(c))
42     #define SET(c) (v->nexttype = (c))
43     #define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n))
44     #define RET(c) return (SET(c), 1)
45     #define RETV(c, n) return (SETV(c, n), 1)
46     #define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */
47     #define LASTTYPE(t) (v->lasttype == (t))
48    
49     /* lexical contexts */
50     #define L_ERE 1 /* mainline ERE/ARE */
51     #define L_BRE 2 /* mainline BRE */
52     #define L_Q 3 /* REG_QUOTE */
53     #define L_EBND 4 /* ERE/ARE bound */
54     #define L_BBND 5 /* BRE bound */
55     #define L_BRACK 6 /* brackets */
56     #define L_CEL 7 /* collating element */
57     #define L_ECL 8 /* equivalence class */
58     #define L_CCL 9 /* character class */
59     #define INTOCON(c) (v->lexcon = (c))
60     #define INCON(con) (v->lexcon == (con))
61    
62     /* construct pointer past end of chr array */
63     #define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
64    
65     /*
66     - lexstart - set up lexical stuff, scan leading options
67     ^ static VOID lexstart(struct vars *);
68     */
69     static VOID
70     lexstart(v)
71     struct vars *v;
72     {
73     prefixes(v); /* may turn on new type bits etc. */
74     NOERR();
75    
76     if (v->cflags&REG_QUOTE) {
77     assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
78     INTOCON(L_Q);
79     } else if (v->cflags&REG_EXTENDED) {
80     assert(!(v->cflags&REG_QUOTE));
81     INTOCON(L_ERE);
82     } else {
83     assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
84     INTOCON(L_BRE);
85     }
86    
87     v->nexttype = EMPTY; /* remember we were at the start */
88     next(v); /* set up the first token */
89     }
90    
91     /*
92     - prefixes - implement various special prefixes
93     ^ static VOID prefixes(struct vars *);
94     */
95     static VOID
96     prefixes(v)
97     struct vars *v;
98     {
99     /* literal string doesn't get any of this stuff */
100     if (v->cflags&REG_QUOTE)
101     return;
102    
103     /* initial "***" gets special things */
104     if (HAVE(4) && NEXT3('*', '*', '*'))
105     switch (*(v->now + 3)) {
106     case CHR('?'): /* "***?" error, msg shows version */
107     ERR(REG_BADPAT);
108     return; /* proceed no further */
109     break;
110     case CHR('='): /* "***=" shifts to literal string */
111     NOTE(REG_UNONPOSIX);
112     v->cflags |= REG_QUOTE;
113     v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
114     v->now += 4;
115     return; /* and there can be no more prefixes */
116     break;
117     case CHR(':'): /* "***:" shifts to AREs */
118     NOTE(REG_UNONPOSIX);
119     v->cflags |= REG_ADVANCED;
120     v->now += 4;
121     break;
122     default: /* otherwise *** is just an error */
123     ERR(REG_BADRPT);
124     return;
125     break;
126     }
127    
128     /* BREs and EREs don't get embedded options */
129     if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
130     return;
131    
132     /* embedded options (AREs only) */
133     if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
134     NOTE(REG_UNONPOSIX);
135     v->now += 2;
136     for (; !ATEOS() && iscalpha(*v->now); v->now++)
137     switch (*v->now) {
138     case CHR('b'): /* BREs (but why???) */
139     v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
140     break;
141     case CHR('c'): /* case sensitive */
142     v->cflags &= ~REG_ICASE;
143     break;
144     case CHR('e'): /* plain EREs */
145     v->cflags |= REG_EXTENDED;
146     v->cflags &= ~(REG_ADVF|REG_QUOTE);
147     break;
148     case CHR('i'): /* case insensitive */
149     v->cflags |= REG_ICASE;
150     break;
151     case CHR('m'): /* Perloid synonym for n */
152     case CHR('n'): /* \n affects ^ $ . [^ */
153     v->cflags |= REG_NEWLINE;
154     break;
155     case CHR('p'): /* ~Perl, \n affects . [^ */
156     v->cflags |= REG_NLSTOP;
157     v->cflags &= ~REG_NLANCH;
158     break;
159     case CHR('q'): /* literal string */
160     v->cflags |= REG_QUOTE;
161     v->cflags &= ~REG_ADVANCED;
162     break;
163     case CHR('s'): /* single line, \n ordinary */
164     v->cflags &= ~REG_NEWLINE;
165     break;
166     case CHR('t'): /* tight syntax */
167     v->cflags &= ~REG_EXPANDED;
168     break;
169     case CHR('w'): /* weird, \n affects ^ $ only */
170     v->cflags &= ~REG_NLSTOP;
171     v->cflags |= REG_NLANCH;
172     break;
173     case CHR('x'): /* expanded syntax */
174     v->cflags |= REG_EXPANDED;
175     break;
176     default:
177     ERR(REG_BADOPT);
178     return;
179     }
180     if (!NEXT1(')')) {
181     ERR(REG_BADOPT);
182     return;
183     }
184     v->now++;
185     if (v->cflags&REG_QUOTE)
186     v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
187     }
188     }
189    
190     /*
191     - lexnest - "call a subroutine", interpolating string at the lexical level
192     * Note, this is not a very general facility. There are a number of
193     * implicit assumptions about what sorts of strings can be subroutines.
194     ^ static VOID lexnest(struct vars *, chr *, chr *);
195     */
196     static VOID
197     lexnest(v, beginp, endp)
198     struct vars *v;
199     chr *beginp; /* start of interpolation */
200     chr *endp; /* one past end of interpolation */
201     {
202     assert(v->savenow == NULL); /* only one level of nesting */
203     v->savenow = v->now;
204     v->savestop = v->stop;
205     v->now = beginp;
206     v->stop = endp;
207     }
208    
209     /*
210     * string constants to interpolate as expansions of things like \d
211     */
212     static chr backd[] = { /* \d */
213     CHR('['), CHR('['), CHR(':'),
214     CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
215     CHR(':'), CHR(']'), CHR(']')
216     };
217     static chr backD[] = { /* \D */
218     CHR('['), CHR('^'), CHR('['), CHR(':'),
219     CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
220     CHR(':'), CHR(']'), CHR(']')
221     };
222     static chr brbackd[] = { /* \d within brackets */
223     CHR('['), CHR(':'),
224     CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
225     CHR(':'), CHR(']')
226     };
227     static chr backs[] = { /* \s */
228     CHR('['), CHR('['), CHR(':'),
229     CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
230     CHR(':'), CHR(']'), CHR(']')
231     };
232     static chr backS[] = { /* \S */
233     CHR('['), CHR('^'), CHR('['), CHR(':'),
234     CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
235     CHR(':'), CHR(']'), CHR(']')
236     };
237     static chr brbacks[] = { /* \s within brackets */
238     CHR('['), CHR(':'),
239     CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
240     CHR(':'), CHR(']')
241     };
242     static chr backw[] = { /* \w */
243     CHR('['), CHR('['), CHR(':'),
244     CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
245     CHR(':'), CHR(']'), CHR('_'), CHR(']')
246     };
247     static chr backW[] = { /* \W */
248     CHR('['), CHR('^'), CHR('['), CHR(':'),
249     CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
250     CHR(':'), CHR(']'), CHR('_'), CHR(']')
251     };
252     static chr brbackw[] = { /* \w within brackets */
253     CHR('['), CHR(':'),
254     CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
255     CHR(':'), CHR(']'), CHR('_')
256     };
257    
258     /*
259     - lexword - interpolate a bracket expression for word characters
260     * Possibly ought to inquire whether there is a "word" character class.
261     ^ static VOID lexword(struct vars *);
262     */
263     static VOID
264     lexword(v)
265     struct vars *v;
266     {
267     lexnest(v, backw, ENDOF(backw));
268     }
269    
270     /*
271     - next - get next token
272     ^ static int next(struct vars *);
273     */
274     static int /* 1 normal, 0 failure */
275     next(v)
276     struct vars *v;
277     {
278     chr c;
279    
280     /* errors yield an infinite sequence of failures */
281     if (ISERR())
282     return 0; /* the error has set nexttype to EOS */
283    
284     /* remember flavor of last token */
285     v->lasttype = v->nexttype;
286    
287     /* REG_BOSONLY */
288     if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
289     /* at start of a REG_BOSONLY RE */
290     RETV(SBEGIN, 0); /* same as \A */
291     }
292    
293     /* if we're nested and we've hit end, return to outer level */
294     if (v->savenow != NULL && ATEOS()) {
295     v->now = v->savenow;
296     v->stop = v->savestop;
297     v->savenow = v->savestop = NULL;
298     }
299    
300     /* skip white space etc. if appropriate (not in literal or []) */
301     if (v->cflags&REG_EXPANDED)
302     switch (v->lexcon) {
303     case L_ERE:
304     case L_BRE:
305     case L_EBND:
306     case L_BBND:
307     skip(v);
308     break;
309     }
310    
311     /* handle EOS, depending on context */
312     if (ATEOS()) {
313     switch (v->lexcon) {
314     case L_ERE:
315     case L_BRE:
316     case L_Q:
317     RET(EOS);
318     break;
319     case L_EBND:
320     case L_BBND:
321     FAILW(REG_EBRACE);
322     break;
323     case L_BRACK:
324     case L_CEL:
325     case L_ECL:
326     case L_CCL:
327     FAILW(REG_EBRACK);
328     break;
329     }
330     assert(NOTREACHED);
331     }
332    
333     /* okay, time to actually get a character */
334     c = *v->now++;
335    
336     /* deal with the easy contexts, punt EREs to code below */
337     switch (v->lexcon) {
338     case L_BRE: /* punt BREs to separate function */
339     return brenext(v, c);
340     break;
341     case L_ERE: /* see below */
342     break;
343     case L_Q: /* literal strings are easy */
344     RETV(PLAIN, c);
345     break;
346     case L_BBND: /* bounds are fairly simple */
347     case L_EBND:
348     switch (c) {
349     case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
350     case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
351     case CHR('8'): case CHR('9'):
352     RETV(DIGIT, (chr)DIGITVAL(c));
353     break;
354     case CHR(','):
355     RET(',');
356     break;
357     case CHR('}'): /* ERE bound ends with } */
358     if (INCON(L_EBND)) {
359     INTOCON(L_ERE);
360     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
361     v->now++;
362     NOTE(REG_UNONPOSIX);
363     RETV('}', 0);
364     }
365     RETV('}', 1);
366     } else
367     FAILW(REG_BADBR);
368     break;
369     case CHR('\\'): /* BRE bound ends with \} */
370     if (INCON(L_BBND) && NEXT1('}')) {
371     v->now++;
372     INTOCON(L_BRE);
373     RET('}');
374     } else
375     FAILW(REG_BADBR);
376     break;
377     default:
378     FAILW(REG_BADBR);
379     break;
380     }
381     assert(NOTREACHED);
382     break;
383     case L_BRACK: /* brackets are not too hard */
384     switch (c) {
385     case CHR(']'):
386     if (LASTTYPE('['))
387     RETV(PLAIN, c);
388     else {
389     INTOCON((v->cflags&REG_EXTENDED) ?
390     L_ERE : L_BRE);
391     RET(']');
392     }
393     break;
394     case CHR('\\'):
395     NOTE(REG_UBBS);
396     if (!(v->cflags&REG_ADVF))
397     RETV(PLAIN, c);
398     NOTE(REG_UNONPOSIX);
399     if (ATEOS())
400     FAILW(REG_EESCAPE);
401     (DISCARD)lexescape(v);
402     switch (v->nexttype) { /* not all escapes okay here */
403     case PLAIN:
404     return 1;
405     break;
406     case CCLASS:
407     switch (v->nextvalue) {
408     case 'd':
409     lexnest(v, brbackd, ENDOF(brbackd));
410     break;
411     case 's':
412     lexnest(v, brbacks, ENDOF(brbacks));
413     break;
414     case 'w':
415     lexnest(v, brbackw, ENDOF(brbackw));
416     break;
417     default:
418     FAILW(REG_EESCAPE);
419     break;
420     }
421     /* lexnest done, back up and try again */
422     v->nexttype = v->lasttype;
423     return next(v);
424     break;
425     }
426     /* not one of the acceptable escapes */
427     FAILW(REG_EESCAPE);
428     break;
429     case CHR('-'):
430     if (LASTTYPE('[') || NEXT1(']'))
431     RETV(PLAIN, c);
432     else
433     RETV(RANGE, c);
434     break;
435     case CHR('['):
436     if (ATEOS())
437     FAILW(REG_EBRACK);
438     switch (*v->now++) {
439     case CHR('.'):
440     INTOCON(L_CEL);
441     /* might or might not be locale-specific */
442     RET(COLLEL);
443     break;
444     case CHR('='):
445     INTOCON(L_ECL);
446     NOTE(REG_ULOCALE);
447     RET(ECLASS);
448     break;
449     case CHR(':'):
450     INTOCON(L_CCL);
451     NOTE(REG_ULOCALE);
452     RET(CCLASS);
453     break;
454     default: /* oops */
455     v->now--;
456     RETV(PLAIN, c);
457     break;
458     }
459     assert(NOTREACHED);
460     break;
461     default:
462     RETV(PLAIN, c);
463     break;
464     }
465     assert(NOTREACHED);
466     break;
467     case L_CEL: /* collating elements are easy */
468     if (c == CHR('.') && NEXT1(']')) {
469     v->now++;
470     INTOCON(L_BRACK);
471     RETV(END, '.');
472     } else
473     RETV(PLAIN, c);
474     break;
475     case L_ECL: /* ditto equivalence classes */
476     if (c == CHR('=') && NEXT1(']')) {
477     v->now++;
478     INTOCON(L_BRACK);
479     RETV(END, '=');
480     } else
481     RETV(PLAIN, c);
482     break;
483     case L_CCL: /* ditto character classes */
484     if (c == CHR(':') && NEXT1(']')) {
485     v->now++;
486     INTOCON(L_BRACK);
487     RETV(END, ':');
488     } else
489     RETV(PLAIN, c);
490     break;
491     default:
492     assert(NOTREACHED);
493     break;
494     }
495    
496     /* that got rid of everything except EREs and AREs */
497     assert(INCON(L_ERE));
498    
499     /* deal with EREs and AREs, except for backslashes */
500     switch (c) {
501     case CHR('|'):
502     RET('|');
503     break;
504     case CHR('*'):
505     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
506     v->now++;
507     NOTE(REG_UNONPOSIX);
508     RETV('*', 0);
509     }
510     RETV('*', 1);
511     break;
512     case CHR('+'):
513     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
514     v->now++;
515     NOTE(REG_UNONPOSIX);
516     RETV('+', 0);
517     }
518     RETV('+', 1);
519     break;
520     case CHR('?'):
521     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
522     v->now++;
523     NOTE(REG_UNONPOSIX);
524     RETV('?', 0);
525     }
526     RETV('?', 1);
527     break;
528     case CHR('{'): /* bounds start or plain character */
529     if (v->cflags&REG_EXPANDED)
530     skip(v);
531     if (ATEOS() || !iscdigit(*v->now)) {
532     NOTE(REG_UBRACES);
533     NOTE(REG_UUNSPEC);
534     RETV(PLAIN, c);
535     } else {
536     NOTE(REG_UBOUNDS);
537     INTOCON(L_EBND);
538     RET('{');
539     }
540     assert(NOTREACHED);
541     break;
542     case CHR('('): /* parenthesis, or advanced extension */
543     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
544     NOTE(REG_UNONPOSIX);
545     v->now++;
546     switch (*v->now++) {
547     case CHR(':'): /* non-capturing paren */
548     RETV('(', 0);
549     break;
550     case CHR('#'): /* comment */
551     while (!ATEOS() && *v->now != CHR(')'))
552     v->now++;
553     if (!ATEOS())
554     v->now++;
555     assert(v->nexttype == v->lasttype);
556     return next(v);
557     break;
558     case CHR('='): /* positive lookahead */
559     NOTE(REG_ULOOKAHEAD);
560     RETV(LACON, 1);
561     break;
562     case CHR('!'): /* negative lookahead */
563     NOTE(REG_ULOOKAHEAD);
564     RETV(LACON, 0);
565     break;
566     default:
567     FAILW(REG_BADRPT);
568     break;
569     }
570     assert(NOTREACHED);
571     }
572     if (v->cflags&REG_NOSUB)
573     RETV('(', 0); /* all parens non-capturing */
574     else
575     RETV('(', 1);
576     break;
577     case CHR(')'):
578     if (LASTTYPE('(')) {
579     NOTE(REG_UUNSPEC);
580     }
581     RETV(')', c);
582     break;
583     case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
584     if (HAVE(6) && *(v->now+0) == CHR('[') &&
585     *(v->now+1) == CHR(':') &&
586     (*(v->now+2) == CHR('<') ||
587     *(v->now+2) == CHR('>')) &&
588     *(v->now+3) == CHR(':') &&
589     *(v->now+4) == CHR(']') &&
590     *(v->now+5) == CHR(']')) {
591     c = *(v->now+2);
592     v->now += 6;
593     NOTE(REG_UNONPOSIX);
594     RET((c == CHR('<')) ? '<' : '>');
595     }
596     INTOCON(L_BRACK);
597     if (NEXT1('^')) {
598     v->now++;
599     RETV('[', 0);
600     }
601     RETV('[', 1);
602     break;
603     case CHR('.'):
604     RET('.');
605     break;
606     case CHR('^'):
607     RET('^');
608     break;
609     case CHR('$'):
610     RET('$');
611     break;
612     case CHR('\\'): /* mostly punt backslashes to code below */
613     if (ATEOS())
614     FAILW(REG_EESCAPE);
615     break;
616     default: /* ordinary character */
617     RETV(PLAIN, c);
618     break;
619     }
620    
621     /* ERE/ARE backslash handling; backslash already eaten */
622     assert(!ATEOS());
623     if (!(v->cflags&REG_ADVF)) { /* only AREs have non-trivial escapes */
624     if (iscalnum(*v->now)) {
625     NOTE(REG_UBSALNUM);
626     NOTE(REG_UUNSPEC);
627     }
628     RETV(PLAIN, *v->now++);
629     }
630     (DISCARD)lexescape(v);
631     if (ISERR())
632     FAILW(REG_EESCAPE);
633     if (v->nexttype == CCLASS) { /* fudge at lexical level */
634     switch (v->nextvalue) {
635     case 'd': lexnest(v, backd, ENDOF(backd)); break;
636     case 'D': lexnest(v, backD, ENDOF(backD)); break;
637     case 's': lexnest(v, backs, ENDOF(backs)); break;
638     case 'S': lexnest(v, backS, ENDOF(backS)); break;
639     case 'w': lexnest(v, backw, ENDOF(backw)); break;
640     case 'W': lexnest(v, backW, ENDOF(backW)); break;
641     default:
642     assert(NOTREACHED);
643     FAILW(REG_ASSERT);
644     break;
645     }
646     /* lexnest done, back up and try again */
647     v->nexttype = v->lasttype;
648     return next(v);
649     }
650     /* otherwise, lexescape has already done the work */
651     return !ISERR();
652     }
653    
654     /*
655     - lexescape - parse an ARE backslash escape (backslash already eaten)
656     * Note slightly nonstandard use of the CCLASS type code.
657     ^ static int lexescape(struct vars *);
658     */
659     static int /* not actually used, but convenient for RETV */
660     lexescape(v)
661     struct vars *v;
662     {
663     chr c;
664     static chr alert[] = {
665     CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
666     };
667     static chr esc[] = {
668     CHR('E'), CHR('S'), CHR('C')
669     };
670     chr *save;
671    
672     assert(v->cflags&REG_ADVF);
673    
674     assert(!ATEOS());
675     c = *v->now++;
676     if (!iscalnum(c))
677     RETV(PLAIN, c);
678    
679     NOTE(REG_UNONPOSIX);
680     switch (c) {
681     case CHR('a'):
682     RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
683     break;
684     case CHR('A'):
685     RETV(SBEGIN, 0);
686     break;
687     case CHR('b'):
688     RETV(PLAIN, CHR('\b'));
689     break;
690     case CHR('B'):
691     RETV(PLAIN, CHR('\\'));
692     break;
693     case CHR('c'):
694     NOTE(REG_UUNPORT);
695     if (ATEOS())
696     FAILW(REG_EESCAPE);
697     RETV(PLAIN, (chr)(*v->now++ & 037));
698     break;
699     case CHR('d'):
700     NOTE(REG_ULOCALE);
701     RETV(CCLASS, 'd');
702     break;
703     case CHR('D'):
704     NOTE(REG_ULOCALE);
705     RETV(CCLASS, 'D');
706     break;
707     case CHR('e'):
708     NOTE(REG_UUNPORT);
709     RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
710     break;
711     case CHR('f'):
712     RETV(PLAIN, CHR('\f'));
713     break;
714     case CHR('m'):
715     RET('<');
716     break;
717     case CHR('M'):
718     RET('>');
719     break;
720     case CHR('n'):
721     RETV(PLAIN, CHR('\n'));
722     break;
723     case CHR('r'):
724     RETV(PLAIN, CHR('\r'));
725     break;
726     case CHR('s'):
727     NOTE(REG_ULOCALE);
728     RETV(CCLASS, 's');
729     break;
730     case CHR('S'):
731     NOTE(REG_ULOCALE);
732     RETV(CCLASS, 'S');
733     break;
734     case CHR('t'):
735     RETV(PLAIN, CHR('\t'));
736     break;
737     case CHR('u'):
738     c = lexdigits(v, 16, 4, 4);
739     if (ISERR())
740     FAILW(REG_EESCAPE);
741     RETV(PLAIN, c);
742     break;
743     case CHR('U'):
744     c = lexdigits(v, 16, 8, 8);
745     if (ISERR())
746     FAILW(REG_EESCAPE);
747     RETV(PLAIN, c);
748     break;
749     case CHR('v'):
750     RETV(PLAIN, CHR('\v'));
751     break;
752     case CHR('w'):
753     NOTE(REG_ULOCALE);
754     RETV(CCLASS, 'w');
755     break;
756     case CHR('W'):
757     NOTE(REG_ULOCALE);
758     RETV(CCLASS, 'W');
759     break;
760     case CHR('x'):
761     NOTE(REG_UUNPORT);
762     c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
763     if (ISERR())
764     FAILW(REG_EESCAPE);
765     RETV(PLAIN, c);
766     break;
767     case CHR('y'):
768     NOTE(REG_ULOCALE);
769     RETV(WBDRY, 0);
770     break;
771     case CHR('Y'):
772     NOTE(REG_ULOCALE);
773     RETV(NWBDRY, 0);
774     break;
775     case CHR('Z'):
776     RETV(SEND, 0);
777     break;
778     case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
779     case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
780     case CHR('9'):
781     save = v->now;
782     v->now--; /* put first digit back */
783     c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
784     if (ISERR())
785     FAILW(REG_EESCAPE);
786     /* ugly heuristic (first test is "exactly 1 digit?") */
787     if (v->now - save == 0 || (int)c <= v->nsubexp) {
788     NOTE(REG_UBACKREF);
789     RETV(BACKREF, (chr)c);
790     }
791     /* oops, doesn't look like it's a backref after all... */
792     v->now = save;
793     /* and fall through into octal number */
794     case CHR('0'):
795     NOTE(REG_UUNPORT);
796     v->now--; /* put first digit back */
797     c = lexdigits(v, 8, 1, 3);
798     if (ISERR())
799     FAILW(REG_EESCAPE);
800     RETV(PLAIN, c);
801     break;
802     default:
803     assert(iscalpha(c));
804     FAILW(REG_EESCAPE); /* unknown alphabetic escape */
805     break;
806     }
807     assert(NOTREACHED);
808     }
809    
810     /*
811     - lexdigits - slurp up digits and return chr value
812     ^ static chr lexdigits(struct vars *, int, int, int);
813     */
814     static chr /* chr value; errors signalled via ERR */
815     lexdigits(v, base, minlen, maxlen)
816     struct vars *v;
817     int base;
818     int minlen;
819     int maxlen;
820     {
821     uchr n; /* unsigned to avoid overflow misbehavior */
822     int len;
823     chr c;
824     int d;
825     CONST uchr ub = (uchr) base;
826    
827     n = 0;
828     for (len = 0; len < maxlen && !ATEOS(); len++) {
829     c = *v->now++;
830     switch (c) {
831     case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
832     case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
833     case CHR('8'): case CHR('9'):
834     d = DIGITVAL(c);
835     break;
836     case CHR('a'): case CHR('A'): d = 10; break;
837     case CHR('b'): case CHR('B'): d = 11; break;
838     case CHR('c'): case CHR('C'): d = 12; break;
839     case CHR('d'): case CHR('D'): d = 13; break;
840     case CHR('e'): case CHR('E'): d = 14; break;
841     case CHR('f'): case CHR('F'): d = 15; break;
842     default:
843     v->now--; /* oops, not a digit at all */
844     d = -1;
845     break;
846     }
847    
848     if (d >= base) { /* not a plausible digit */
849     v->now--;
850     d = -1;
851     }
852     if (d < 0)
853     break; /* NOTE BREAK OUT */
854     n = n*ub + (uchr)d;
855     }
856     if (len < minlen)
857     ERR(REG_EESCAPE);
858    
859     return (chr)n;
860     }
861    
862     /*
863     - brenext - get next BRE token
864     * This is much like EREs except for all the stupid backslashes and the
865     * context-dependency of some things.
866     ^ static int brenext(struct vars *, pchr);
867     */
868     static int /* 1 normal, 0 failure */
869     brenext(v, pc)
870     struct vars *v;
871     pchr pc;
872     {
873     chr c = (chr)pc;
874    
875     switch (c) {
876     case CHR('*'):
877     if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
878     RETV(PLAIN, c);
879     RET('*');
880     break;
881     case CHR('['):
882     if (HAVE(6) && *(v->now+0) == CHR('[') &&
883     *(v->now+1) == CHR(':') &&
884     (*(v->now+2) == CHR('<') ||
885     *(v->now+2) == CHR('>')) &&
886     *(v->now+3) == CHR(':') &&
887     *(v->now+4) == CHR(']') &&
888     *(v->now+5) == CHR(']')) {
889     c = *(v->now+2);
890     v->now += 6;
891     NOTE(REG_UNONPOSIX);
892     RET((c == CHR('<')) ? '<' : '>');
893     }
894     INTOCON(L_BRACK);
895     if (NEXT1('^')) {
896     v->now++;
897     RETV('[', 0);
898     }
899     RETV('[', 1);
900     break;
901     case CHR('.'):
902     RET('.');
903     break;
904     case CHR('^'):
905     if (LASTTYPE(EMPTY))
906     RET('^');
907     if (LASTTYPE('(')) {
908     NOTE(REG_UUNSPEC);
909     RET('^');
910     }
911     RETV(PLAIN, c);
912     break;
913     case CHR('$'):
914     if (v->cflags&REG_EXPANDED)
915     skip(v);
916     if (ATEOS())
917     RET('$');
918     if (NEXT2('\\', ')')) {
919     NOTE(REG_UUNSPEC);
920     RET('$');
921     }
922     RETV(PLAIN, c);
923     break;
924     case CHR('\\'):
925     break; /* see below */
926     default:
927     RETV(PLAIN, c);
928     break;
929     }
930    
931     assert(c == CHR('\\'));
932    
933     if (ATEOS())
934     FAILW(REG_EESCAPE);
935    
936     c = *v->now++;
937     switch (c) {
938     case CHR('{'):
939     INTOCON(L_BBND);
940     NOTE(REG_UBOUNDS);
941     RET('{');
942     break;
943     case CHR('('):
944     RETV('(', 1);
945     break;
946     case CHR(')'):
947     RETV(')', c);
948     break;
949     case CHR('<'):
950     NOTE(REG_UNONPOSIX);
951     RET('<');
952     break;
953     case CHR('>'):
954     NOTE(REG_UNONPOSIX);
955     RET('>');
956     break;
957     case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
958     case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
959     case CHR('9'):
960     NOTE(REG_UBACKREF);
961     RETV(BACKREF, (chr)DIGITVAL(c));
962     break;
963     default:
964     if (iscalnum(c)) {
965     NOTE(REG_UBSALNUM);
966     NOTE(REG_UUNSPEC);
967     }
968     RETV(PLAIN, c);
969     break;
970     }
971    
972     assert(NOTREACHED);
973     }
974    
975     /*
976     - skip - skip white space and comments in expanded form
977     ^ static VOID skip(struct vars *);
978     */
979     static VOID
980     skip(v)
981     struct vars *v;
982     {
983     chr *start = v->now;
984    
985     assert(v->cflags&REG_EXPANDED);
986    
987     for (;;) {
988     while (!ATEOS() && iscspace(*v->now))
989     v->now++;
990     if (ATEOS() || *v->now != CHR('#'))
991     break; /* NOTE BREAK OUT */
992     assert(NEXT1('#'));
993     while (!ATEOS() && *v->now != CHR('\n'))
994     v->now++;
995     /* leave the newline to be picked up by the iscspace loop */
996     }
997    
998     if (v->now != start)
999     NOTE(REG_UNONPOSIX);
1000     }
1001    
1002     /*
1003     - newline - return the chr for a newline
1004     * This helps confine use of CHR to this source file.
1005     ^ static chr newline(NOPARMS);
1006     */
1007     static chr
1008     newline()
1009     {
1010     return CHR('\n');
1011     }
1012    
1013     /*
1014     - ch - return the chr sequence for regc_locale.c's fake collating element ch
1015     * This helps confine use of CHR to this source file. Beware that the caller
1016     * knows how long the sequence is.
1017     ^ #ifdef REG_DEBUG
1018     ^ static chr *ch(NOPARMS);
1019     ^ #endif
1020     */
1021     #ifdef REG_DEBUG
1022     static chr *
1023     ch()
1024     {
1025     static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
1026    
1027     return chstr;
1028     }
1029     #endif
1030    
1031     /*
1032     - chrnamed - return the chr known by a given (chr string) name
1033     * The code is a bit clumsy, but this routine gets only such specialized
1034     * use that it hardly matters.
1035     ^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
1036     */
1037     static chr
1038     chrnamed(v, startp, endp, lastresort)
1039     struct vars *v;
1040     chr *startp; /* start of name */
1041     chr *endp; /* just past end of name */
1042     pchr lastresort; /* what to return if name lookup fails */
1043     {
1044     celt c;
1045     int errsave;
1046     int e;
1047     struct cvec *cv;
1048    
1049     errsave = v->err;
1050     v->err = 0;
1051     c = element(v, startp, endp);
1052     e = v->err;
1053     v->err = errsave;
1054    
1055     if (e != 0)
1056     return (chr)lastresort;
1057    
1058     cv = range(v, c, c, 0);
1059     if (cv->nchrs == 0)
1060     return (chr)lastresort;
1061     return cv->chrs[0];
1062     }
1063    
1064     /* End of regc_lex.c */

Properties

Name Value
svn:eol-style native
svn:keywords Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25