/[dtapublic]/projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regc_lex.c
ViewVC logotype

Annotation of /projs/trunk/shared_source/c_tcl_base_7_5_w_mods/regc_lex.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 44 - (hide annotations) (download)
Fri Oct 14 02:09:58 2016 UTC (8 years ago) by dashley
File MIME type: text/plain
File size: 25927 byte(s)
Rename for reorganization.
1 dashley 25 /* $Header: /cvsroot/esrg/sfesrg/esrgpcpj/shared/tcl_base/regc_lex.c,v 1.1.1.1 2001/06/13 04:31:50 dtashley Exp $ */
2    
3     /*
4     * lexical analyzer
5     * This file is #included by regcomp.c.
6     *
7     * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
8     *
9     * Development of this software was funded, in part, by Cray Research Inc.,
10     * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
11     * Corporation, none of whom are responsible for the results. The author
12     * thanks all of them.
13     *
14     * Redistribution and use in source and binary forms -- with or without
15     * modification -- are permitted for any purpose, provided that
16     * redistributions in source form retain this entire copyright notice and
17     * indicate the origin and nature of any modifications.
18     *
19     * I'd appreciate being given credit for this package in the documentation
20     * of software which uses it, but that is not a requirement.
21     *
22     * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
23     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
24     * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
25     * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26     * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27     * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28     * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29     * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30     * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31     * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32     *
33     */
34    
35     /* scanning macros (know about v) */
36     #define ATEOS() (v->now >= v->stop)
37     #define HAVE(n) (v->stop - v->now >= (n))
38     #define NEXT1(c) (!ATEOS() && *v->now == CHR(c))
39     #define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
40     #define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \
41     *(v->now+1) == CHR(b) && \
42     *(v->now+2) == CHR(c))
43     #define SET(c) (v->nexttype = (c))
44     #define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n))
45     #define RET(c) return (SET(c), 1)
46     #define RETV(c, n) return (SETV(c, n), 1)
47     #define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */
48     #define LASTTYPE(t) (v->lasttype == (t))
49    
50     /* lexical contexts */
51     #define L_ERE 1 /* mainline ERE/ARE */
52     #define L_BRE 2 /* mainline BRE */
53     #define L_Q 3 /* REG_QUOTE */
54     #define L_EBND 4 /* ERE/ARE bound */
55     #define L_BBND 5 /* BRE bound */
56     #define L_BRACK 6 /* brackets */
57     #define L_CEL 7 /* collating element */
58     #define L_ECL 8 /* equivalence class */
59     #define L_CCL 9 /* character class */
60     #define INTOCON(c) (v->lexcon = (c))
61     #define INCON(con) (v->lexcon == (con))
62    
63     /* construct pointer past end of chr array */
64     #define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
65    
66     /*
67     - lexstart - set up lexical stuff, scan leading options
68     ^ static VOID lexstart(struct vars *);
69     */
70     static VOID
71     lexstart(v)
72     struct vars *v;
73     {
74     prefixes(v); /* may turn on new type bits etc. */
75     NOERR();
76    
77     if (v->cflags&REG_QUOTE) {
78     assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
79     INTOCON(L_Q);
80     } else if (v->cflags&REG_EXTENDED) {
81     assert(!(v->cflags&REG_QUOTE));
82     INTOCON(L_ERE);
83     } else {
84     assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
85     INTOCON(L_BRE);
86     }
87    
88     v->nexttype = EMPTY; /* remember we were at the start */
89     next(v); /* set up the first token */
90     }
91    
92     /*
93     - prefixes - implement various special prefixes
94     ^ static VOID prefixes(struct vars *);
95     */
96     static VOID
97     prefixes(v)
98     struct vars *v;
99     {
100     /* literal string doesn't get any of this stuff */
101     if (v->cflags&REG_QUOTE)
102     return;
103    
104     /* initial "***" gets special things */
105     if (HAVE(4) && NEXT3('*', '*', '*'))
106     switch (*(v->now + 3)) {
107     case CHR('?'): /* "***?" error, msg shows version */
108     ERR(REG_BADPAT);
109     return; /* proceed no further */
110     break;
111     case CHR('='): /* "***=" shifts to literal string */
112     NOTE(REG_UNONPOSIX);
113     v->cflags |= REG_QUOTE;
114     v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
115     v->now += 4;
116     return; /* and there can be no more prefixes */
117     break;
118     case CHR(':'): /* "***:" shifts to AREs */
119     NOTE(REG_UNONPOSIX);
120     v->cflags |= REG_ADVANCED;
121     v->now += 4;
122     break;
123     default: /* otherwise *** is just an error */
124     ERR(REG_BADRPT);
125     return;
126     break;
127     }
128    
129     /* BREs and EREs don't get embedded options */
130     if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
131     return;
132    
133     /* embedded options (AREs only) */
134     if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
135     NOTE(REG_UNONPOSIX);
136     v->now += 2;
137     for (; !ATEOS() && iscalpha(*v->now); v->now++)
138     switch (*v->now) {
139     case CHR('b'): /* BREs (but why???) */
140     v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
141     break;
142     case CHR('c'): /* case sensitive */
143     v->cflags &= ~REG_ICASE;
144     break;
145     case CHR('e'): /* plain EREs */
146     v->cflags |= REG_EXTENDED;
147     v->cflags &= ~(REG_ADVF|REG_QUOTE);
148     break;
149     case CHR('i'): /* case insensitive */
150     v->cflags |= REG_ICASE;
151     break;
152     case CHR('m'): /* Perloid synonym for n */
153     case CHR('n'): /* \n affects ^ $ . [^ */
154     v->cflags |= REG_NEWLINE;
155     break;
156     case CHR('p'): /* ~Perl, \n affects . [^ */
157     v->cflags |= REG_NLSTOP;
158     v->cflags &= ~REG_NLANCH;
159     break;
160     case CHR('q'): /* literal string */
161     v->cflags |= REG_QUOTE;
162     v->cflags &= ~REG_ADVANCED;
163     break;
164     case CHR('s'): /* single line, \n ordinary */
165     v->cflags &= ~REG_NEWLINE;
166     break;
167     case CHR('t'): /* tight syntax */
168     v->cflags &= ~REG_EXPANDED;
169     break;
170     case CHR('w'): /* weird, \n affects ^ $ only */
171     v->cflags &= ~REG_NLSTOP;
172     v->cflags |= REG_NLANCH;
173     break;
174     case CHR('x'): /* expanded syntax */
175     v->cflags |= REG_EXPANDED;
176     break;
177     default:
178     ERR(REG_BADOPT);
179     return;
180     }
181     if (!NEXT1(')')) {
182     ERR(REG_BADOPT);
183     return;
184     }
185     v->now++;
186     if (v->cflags&REG_QUOTE)
187     v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
188     }
189     }
190    
191     /*
192     - lexnest - "call a subroutine", interpolating string at the lexical level
193     * Note, this is not a very general facility. There are a number of
194     * implicit assumptions about what sorts of strings can be subroutines.
195     ^ static VOID lexnest(struct vars *, chr *, chr *);
196     */
197     static VOID
198     lexnest(v, beginp, endp)
199     struct vars *v;
200     chr *beginp; /* start of interpolation */
201     chr *endp; /* one past end of interpolation */
202     {
203     assert(v->savenow == NULL); /* only one level of nesting */
204     v->savenow = v->now;
205     v->savestop = v->stop;
206     v->now = beginp;
207     v->stop = endp;
208     }
209    
210     /*
211     * string constants to interpolate as expansions of things like \d
212     */
213     static chr backd[] = { /* \d */
214     CHR('['), CHR('['), CHR(':'),
215     CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
216     CHR(':'), CHR(']'), CHR(']')
217     };
218     static chr backD[] = { /* \D */
219     CHR('['), CHR('^'), CHR('['), CHR(':'),
220     CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
221     CHR(':'), CHR(']'), CHR(']')
222     };
223     static chr brbackd[] = { /* \d within brackets */
224     CHR('['), CHR(':'),
225     CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
226     CHR(':'), CHR(']')
227     };
228     static chr backs[] = { /* \s */
229     CHR('['), CHR('['), CHR(':'),
230     CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
231     CHR(':'), CHR(']'), CHR(']')
232     };
233     static chr backS[] = { /* \S */
234     CHR('['), CHR('^'), CHR('['), CHR(':'),
235     CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
236     CHR(':'), CHR(']'), CHR(']')
237     };
238     static chr brbacks[] = { /* \s within brackets */
239     CHR('['), CHR(':'),
240     CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
241     CHR(':'), CHR(']')
242     };
243     static chr backw[] = { /* \w */
244     CHR('['), CHR('['), CHR(':'),
245     CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
246     CHR(':'), CHR(']'), CHR('_'), CHR(']')
247     };
248     static chr backW[] = { /* \W */
249     CHR('['), CHR('^'), CHR('['), CHR(':'),
250     CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
251     CHR(':'), CHR(']'), CHR('_'), CHR(']')
252     };
253     static chr brbackw[] = { /* \w within brackets */
254     CHR('['), CHR(':'),
255     CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
256     CHR(':'), CHR(']'), CHR('_')
257     };
258    
259     /*
260     - lexword - interpolate a bracket expression for word characters
261     * Possibly ought to inquire whether there is a "word" character class.
262     ^ static VOID lexword(struct vars *);
263     */
264     static VOID
265     lexword(v)
266     struct vars *v;
267     {
268     lexnest(v, backw, ENDOF(backw));
269     }
270    
271     /*
272     - next - get next token
273     ^ static int next(struct vars *);
274     */
275     static int /* 1 normal, 0 failure */
276     next(v)
277     struct vars *v;
278     {
279     chr c;
280    
281     /* errors yield an infinite sequence of failures */
282     if (ISERR())
283     return 0; /* the error has set nexttype to EOS */
284    
285     /* remember flavor of last token */
286     v->lasttype = v->nexttype;
287    
288     /* REG_BOSONLY */
289     if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
290     /* at start of a REG_BOSONLY RE */
291     RETV(SBEGIN, 0); /* same as \A */
292     }
293    
294     /* if we're nested and we've hit end, return to outer level */
295     if (v->savenow != NULL && ATEOS()) {
296     v->now = v->savenow;
297     v->stop = v->savestop;
298     v->savenow = v->savestop = NULL;
299     }
300    
301     /* skip white space etc. if appropriate (not in literal or []) */
302     if (v->cflags&REG_EXPANDED)
303     switch (v->lexcon) {
304     case L_ERE:
305     case L_BRE:
306     case L_EBND:
307     case L_BBND:
308     skip(v);
309     break;
310     }
311    
312     /* handle EOS, depending on context */
313     if (ATEOS()) {
314     switch (v->lexcon) {
315     case L_ERE:
316     case L_BRE:
317     case L_Q:
318     RET(EOS);
319     break;
320     case L_EBND:
321     case L_BBND:
322     FAILW(REG_EBRACE);
323     break;
324     case L_BRACK:
325     case L_CEL:
326     case L_ECL:
327     case L_CCL:
328     FAILW(REG_EBRACK);
329     break;
330     }
331     assert(NOTREACHED);
332     }
333    
334     /* okay, time to actually get a character */
335     c = *v->now++;
336    
337     /* deal with the easy contexts, punt EREs to code below */
338     switch (v->lexcon) {
339     case L_BRE: /* punt BREs to separate function */
340     return brenext(v, c);
341     break;
342     case L_ERE: /* see below */
343     break;
344     case L_Q: /* literal strings are easy */
345     RETV(PLAIN, c);
346     break;
347     case L_BBND: /* bounds are fairly simple */
348     case L_EBND:
349     switch (c) {
350     case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
351     case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
352     case CHR('8'): case CHR('9'):
353     RETV(DIGIT, (chr)DIGITVAL(c));
354     break;
355     case CHR(','):
356     RET(',');
357     break;
358     case CHR('}'): /* ERE bound ends with } */
359     if (INCON(L_EBND)) {
360     INTOCON(L_ERE);
361     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
362     v->now++;
363     NOTE(REG_UNONPOSIX);
364     RETV('}', 0);
365     }
366     RETV('}', 1);
367     } else
368     FAILW(REG_BADBR);
369     break;
370     case CHR('\\'): /* BRE bound ends with \} */
371     if (INCON(L_BBND) && NEXT1('}')) {
372     v->now++;
373     INTOCON(L_BRE);
374     RET('}');
375     } else
376     FAILW(REG_BADBR);
377     break;
378     default:
379     FAILW(REG_BADBR);
380     break;
381     }
382     assert(NOTREACHED);
383     break;
384     case L_BRACK: /* brackets are not too hard */
385     switch (c) {
386     case CHR(']'):
387     if (LASTTYPE('['))
388     RETV(PLAIN, c);
389     else {
390     INTOCON((v->cflags&REG_EXTENDED) ?
391     L_ERE : L_BRE);
392     RET(']');
393     }
394     break;
395     case CHR('\\'):
396     NOTE(REG_UBBS);
397     if (!(v->cflags&REG_ADVF))
398     RETV(PLAIN, c);
399     NOTE(REG_UNONPOSIX);
400     if (ATEOS())
401     FAILW(REG_EESCAPE);
402     (DISCARD)lexescape(v);
403     switch (v->nexttype) { /* not all escapes okay here */
404     case PLAIN:
405     return 1;
406     break;
407     case CCLASS:
408     switch (v->nextvalue) {
409     case 'd':
410     lexnest(v, brbackd, ENDOF(brbackd));
411     break;
412     case 's':
413     lexnest(v, brbacks, ENDOF(brbacks));
414     break;
415     case 'w':
416     lexnest(v, brbackw, ENDOF(brbackw));
417     break;
418     default:
419     FAILW(REG_EESCAPE);
420     break;
421     }
422     /* lexnest done, back up and try again */
423     v->nexttype = v->lasttype;
424     return next(v);
425     break;
426     }
427     /* not one of the acceptable escapes */
428     FAILW(REG_EESCAPE);
429     break;
430     case CHR('-'):
431     if (LASTTYPE('[') || NEXT1(']'))
432     RETV(PLAIN, c);
433     else
434     RETV(RANGE, c);
435     break;
436     case CHR('['):
437     if (ATEOS())
438     FAILW(REG_EBRACK);
439     switch (*v->now++) {
440     case CHR('.'):
441     INTOCON(L_CEL);
442     /* might or might not be locale-specific */
443     RET(COLLEL);
444     break;
445     case CHR('='):
446     INTOCON(L_ECL);
447     NOTE(REG_ULOCALE);
448     RET(ECLASS);
449     break;
450     case CHR(':'):
451     INTOCON(L_CCL);
452     NOTE(REG_ULOCALE);
453     RET(CCLASS);
454     break;
455     default: /* oops */
456     v->now--;
457     RETV(PLAIN, c);
458     break;
459     }
460     assert(NOTREACHED);
461     break;
462     default:
463     RETV(PLAIN, c);
464     break;
465     }
466     assert(NOTREACHED);
467     break;
468     case L_CEL: /* collating elements are easy */
469     if (c == CHR('.') && NEXT1(']')) {
470     v->now++;
471     INTOCON(L_BRACK);
472     RETV(END, '.');
473     } else
474     RETV(PLAIN, c);
475     break;
476     case L_ECL: /* ditto equivalence classes */
477     if (c == CHR('=') && NEXT1(']')) {
478     v->now++;
479     INTOCON(L_BRACK);
480     RETV(END, '=');
481     } else
482     RETV(PLAIN, c);
483     break;
484     case L_CCL: /* ditto character classes */
485     if (c == CHR(':') && NEXT1(']')) {
486     v->now++;
487     INTOCON(L_BRACK);
488     RETV(END, ':');
489     } else
490     RETV(PLAIN, c);
491     break;
492     default:
493     assert(NOTREACHED);
494     break;
495     }
496    
497     /* that got rid of everything except EREs and AREs */
498     assert(INCON(L_ERE));
499    
500     /* deal with EREs and AREs, except for backslashes */
501     switch (c) {
502     case CHR('|'):
503     RET('|');
504     break;
505     case CHR('*'):
506     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
507     v->now++;
508     NOTE(REG_UNONPOSIX);
509     RETV('*', 0);
510     }
511     RETV('*', 1);
512     break;
513     case CHR('+'):
514     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
515     v->now++;
516     NOTE(REG_UNONPOSIX);
517     RETV('+', 0);
518     }
519     RETV('+', 1);
520     break;
521     case CHR('?'):
522     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
523     v->now++;
524     NOTE(REG_UNONPOSIX);
525     RETV('?', 0);
526     }
527     RETV('?', 1);
528     break;
529     case CHR('{'): /* bounds start or plain character */
530     if (v->cflags&REG_EXPANDED)
531     skip(v);
532     if (ATEOS() || !iscdigit(*v->now)) {
533     NOTE(REG_UBRACES);
534     NOTE(REG_UUNSPEC);
535     RETV(PLAIN, c);
536     } else {
537     NOTE(REG_UBOUNDS);
538     INTOCON(L_EBND);
539     RET('{');
540     }
541     assert(NOTREACHED);
542     break;
543     case CHR('('): /* parenthesis, or advanced extension */
544     if ((v->cflags&REG_ADVF) && NEXT1('?')) {
545     NOTE(REG_UNONPOSIX);
546     v->now++;
547     switch (*v->now++) {
548     case CHR(':'): /* non-capturing paren */
549     RETV('(', 0);
550     break;
551     case CHR('#'): /* comment */
552     while (!ATEOS() && *v->now != CHR(')'))
553     v->now++;
554     if (!ATEOS())
555     v->now++;
556     assert(v->nexttype == v->lasttype);
557     return next(v);
558     break;
559     case CHR('='): /* positive lookahead */
560     NOTE(REG_ULOOKAHEAD);
561     RETV(LACON, 1);
562     break;
563     case CHR('!'): /* negative lookahead */
564     NOTE(REG_ULOOKAHEAD);
565     RETV(LACON, 0);
566     break;
567     default:
568     FAILW(REG_BADRPT);
569     break;
570     }
571     assert(NOTREACHED);
572     }
573     if (v->cflags&REG_NOSUB)
574     RETV('(', 0); /* all parens non-capturing */
575     else
576     RETV('(', 1);
577     break;
578     case CHR(')'):
579     if (LASTTYPE('(')) {
580     NOTE(REG_UUNSPEC);
581     }
582     RETV(')', c);
583     break;
584     case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
585     if (HAVE(6) && *(v->now+0) == CHR('[') &&
586     *(v->now+1) == CHR(':') &&
587     (*(v->now+2) == CHR('<') ||
588     *(v->now+2) == CHR('>')) &&
589     *(v->now+3) == CHR(':') &&
590     *(v->now+4) == CHR(']') &&
591     *(v->now+5) == CHR(']')) {
592     c = *(v->now+2);
593     v->now += 6;
594     NOTE(REG_UNONPOSIX);
595     RET((c == CHR('<')) ? '<' : '>');
596     }
597     INTOCON(L_BRACK);
598     if (NEXT1('^')) {
599     v->now++;
600     RETV('[', 0);
601     }
602     RETV('[', 1);
603     break;
604     case CHR('.'):
605     RET('.');
606     break;
607     case CHR('^'):
608     RET('^');
609     break;
610     case CHR('$'):
611     RET('$');
612     break;
613     case CHR('\\'): /* mostly punt backslashes to code below */
614     if (ATEOS())
615     FAILW(REG_EESCAPE);
616     break;
617     default: /* ordinary character */
618     RETV(PLAIN, c);
619     break;
620     }
621    
622     /* ERE/ARE backslash handling; backslash already eaten */
623     assert(!ATEOS());
624     if (!(v->cflags&REG_ADVF)) { /* only AREs have non-trivial escapes */
625     if (iscalnum(*v->now)) {
626     NOTE(REG_UBSALNUM);
627     NOTE(REG_UUNSPEC);
628     }
629     RETV(PLAIN, *v->now++);
630     }
631     (DISCARD)lexescape(v);
632     if (ISERR())
633     FAILW(REG_EESCAPE);
634     if (v->nexttype == CCLASS) { /* fudge at lexical level */
635     switch (v->nextvalue) {
636     case 'd': lexnest(v, backd, ENDOF(backd)); break;
637     case 'D': lexnest(v, backD, ENDOF(backD)); break;
638     case 's': lexnest(v, backs, ENDOF(backs)); break;
639     case 'S': lexnest(v, backS, ENDOF(backS)); break;
640     case 'w': lexnest(v, backw, ENDOF(backw)); break;
641     case 'W': lexnest(v, backW, ENDOF(backW)); break;
642     default:
643     assert(NOTREACHED);
644     FAILW(REG_ASSERT);
645     break;
646     }
647     /* lexnest done, back up and try again */
648     v->nexttype = v->lasttype;
649     return next(v);
650     }
651     /* otherwise, lexescape has already done the work */
652     return !ISERR();
653     }
654    
655     /*
656     - lexescape - parse an ARE backslash escape (backslash already eaten)
657     * Note slightly nonstandard use of the CCLASS type code.
658     ^ static int lexescape(struct vars *);
659     */
660     static int /* not actually used, but convenient for RETV */
661     lexescape(v)
662     struct vars *v;
663     {
664     chr c;
665     static chr alert[] = {
666     CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
667     };
668     static chr esc[] = {
669     CHR('E'), CHR('S'), CHR('C')
670     };
671     chr *save;
672    
673     assert(v->cflags&REG_ADVF);
674    
675     assert(!ATEOS());
676     c = *v->now++;
677     if (!iscalnum(c))
678     RETV(PLAIN, c);
679    
680     NOTE(REG_UNONPOSIX);
681     switch (c) {
682     case CHR('a'):
683     RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
684     break;
685     case CHR('A'):
686     RETV(SBEGIN, 0);
687     break;
688     case CHR('b'):
689     RETV(PLAIN, CHR('\b'));
690     break;
691     case CHR('B'):
692     RETV(PLAIN, CHR('\\'));
693     break;
694     case CHR('c'):
695     NOTE(REG_UUNPORT);
696     if (ATEOS())
697     FAILW(REG_EESCAPE);
698     RETV(PLAIN, (chr)(*v->now++ & 037));
699     break;
700     case CHR('d'):
701     NOTE(REG_ULOCALE);
702     RETV(CCLASS, 'd');
703     break;
704     case CHR('D'):
705     NOTE(REG_ULOCALE);
706     RETV(CCLASS, 'D');
707     break;
708     case CHR('e'):
709     NOTE(REG_UUNPORT);
710     RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
711     break;
712     case CHR('f'):
713     RETV(PLAIN, CHR('\f'));
714     break;
715     case CHR('m'):
716     RET('<');
717     break;
718     case CHR('M'):
719     RET('>');
720     break;
721     case CHR('n'):
722     RETV(PLAIN, CHR('\n'));
723     break;
724     case CHR('r'):
725     RETV(PLAIN, CHR('\r'));
726     break;
727     case CHR('s'):
728     NOTE(REG_ULOCALE);
729     RETV(CCLASS, 's');
730     break;
731     case CHR('S'):
732     NOTE(REG_ULOCALE);
733     RETV(CCLASS, 'S');
734     break;
735     case CHR('t'):
736     RETV(PLAIN, CHR('\t'));
737     break;
738     case CHR('u'):
739     c = lexdigits(v, 16, 4, 4);
740     if (ISERR())
741     FAILW(REG_EESCAPE);
742     RETV(PLAIN, c);
743     break;
744     case CHR('U'):
745     c = lexdigits(v, 16, 8, 8);
746     if (ISERR())
747     FAILW(REG_EESCAPE);
748     RETV(PLAIN, c);
749     break;
750     case CHR('v'):
751     RETV(PLAIN, CHR('\v'));
752     break;
753     case CHR('w'):
754     NOTE(REG_ULOCALE);
755     RETV(CCLASS, 'w');
756     break;
757     case CHR('W'):
758     NOTE(REG_ULOCALE);
759     RETV(CCLASS, 'W');
760     break;
761     case CHR('x'):
762     NOTE(REG_UUNPORT);
763     c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
764     if (ISERR())
765     FAILW(REG_EESCAPE);
766     RETV(PLAIN, c);
767     break;
768     case CHR('y'):
769     NOTE(REG_ULOCALE);
770     RETV(WBDRY, 0);
771     break;
772     case CHR('Y'):
773     NOTE(REG_ULOCALE);
774     RETV(NWBDRY, 0);
775     break;
776     case CHR('Z'):
777     RETV(SEND, 0);
778     break;
779     case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
780     case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
781     case CHR('9'):
782     save = v->now;
783     v->now--; /* put first digit back */
784     c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
785     if (ISERR())
786     FAILW(REG_EESCAPE);
787     /* ugly heuristic (first test is "exactly 1 digit?") */
788     if (v->now - save == 0 || (int)c <= v->nsubexp) {
789     NOTE(REG_UBACKREF);
790     RETV(BACKREF, (chr)c);
791     }
792     /* oops, doesn't look like it's a backref after all... */
793     v->now = save;
794     /* and fall through into octal number */
795     case CHR('0'):
796     NOTE(REG_UUNPORT);
797     v->now--; /* put first digit back */
798     c = lexdigits(v, 8, 1, 3);
799     if (ISERR())
800     FAILW(REG_EESCAPE);
801     RETV(PLAIN, c);
802     break;
803     default:
804     assert(iscalpha(c));
805     FAILW(REG_EESCAPE); /* unknown alphabetic escape */
806     break;
807     }
808     assert(NOTREACHED);
809     }
810    
811     /*
812     - lexdigits - slurp up digits and return chr value
813     ^ static chr lexdigits(struct vars *, int, int, int);
814     */
815     static chr /* chr value; errors signalled via ERR */
816     lexdigits(v, base, minlen, maxlen)
817     struct vars *v;
818     int base;
819     int minlen;
820     int maxlen;
821     {
822     uchr n; /* unsigned to avoid overflow misbehavior */
823     int len;
824     chr c;
825     int d;
826     CONST uchr ub = (uchr) base;
827    
828     n = 0;
829     for (len = 0; len < maxlen && !ATEOS(); len++) {
830     c = *v->now++;
831     switch (c) {
832     case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
833     case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
834     case CHR('8'): case CHR('9'):
835     d = DIGITVAL(c);
836     break;
837     case CHR('a'): case CHR('A'): d = 10; break;
838     case CHR('b'): case CHR('B'): d = 11; break;
839     case CHR('c'): case CHR('C'): d = 12; break;
840     case CHR('d'): case CHR('D'): d = 13; break;
841     case CHR('e'): case CHR('E'): d = 14; break;
842     case CHR('f'): case CHR('F'): d = 15; break;
843     default:
844     v->now--; /* oops, not a digit at all */
845     d = -1;
846     break;
847     }
848    
849     if (d >= base) { /* not a plausible digit */
850     v->now--;
851     d = -1;
852     }
853     if (d < 0)
854     break; /* NOTE BREAK OUT */
855     n = n*ub + (uchr)d;
856     }
857     if (len < minlen)
858     ERR(REG_EESCAPE);
859    
860     return (chr)n;
861     }
862    
863     /*
864     - brenext - get next BRE token
865     * This is much like EREs except for all the stupid backslashes and the
866     * context-dependency of some things.
867     ^ static int brenext(struct vars *, pchr);
868     */
869     static int /* 1 normal, 0 failure */
870     brenext(v, pc)
871     struct vars *v;
872     pchr pc;
873     {
874     chr c = (chr)pc;
875    
876     switch (c) {
877     case CHR('*'):
878     if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
879     RETV(PLAIN, c);
880     RET('*');
881     break;
882     case CHR('['):
883     if (HAVE(6) && *(v->now+0) == CHR('[') &&
884     *(v->now+1) == CHR(':') &&
885     (*(v->now+2) == CHR('<') ||
886     *(v->now+2) == CHR('>')) &&
887     *(v->now+3) == CHR(':') &&
888     *(v->now+4) == CHR(']') &&
889     *(v->now+5) == CHR(']')) {
890     c = *(v->now+2);
891     v->now += 6;
892     NOTE(REG_UNONPOSIX);
893     RET((c == CHR('<')) ? '<' : '>');
894     }
895     INTOCON(L_BRACK);
896     if (NEXT1('^')) {
897     v->now++;
898     RETV('[', 0);
899     }
900     RETV('[', 1);
901     break;
902     case CHR('.'):
903     RET('.');
904     break;
905     case CHR('^'):
906     if (LASTTYPE(EMPTY))
907     RET('^');
908     if (LASTTYPE('(')) {
909     NOTE(REG_UUNSPEC);
910     RET('^');
911     }
912     RETV(PLAIN, c);
913     break;
914     case CHR('$'):
915     if (v->cflags&REG_EXPANDED)
916     skip(v);
917     if (ATEOS())
918     RET('$');
919     if (NEXT2('\\', ')')) {
920     NOTE(REG_UUNSPEC);
921     RET('$');
922     }
923     RETV(PLAIN, c);
924     break;
925     case CHR('\\'):
926     break; /* see below */
927     default:
928     RETV(PLAIN, c);
929     break;
930     }
931    
932     assert(c == CHR('\\'));
933    
934     if (ATEOS())
935     FAILW(REG_EESCAPE);
936    
937     c = *v->now++;
938     switch (c) {
939     case CHR('{'):
940     INTOCON(L_BBND);
941     NOTE(REG_UBOUNDS);
942     RET('{');
943     break;
944     case CHR('('):
945     RETV('(', 1);
946     break;
947     case CHR(')'):
948     RETV(')', c);
949     break;
950     case CHR('<'):
951     NOTE(REG_UNONPOSIX);
952     RET('<');
953     break;
954     case CHR('>'):
955     NOTE(REG_UNONPOSIX);
956     RET('>');
957     break;
958     case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
959     case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
960     case CHR('9'):
961     NOTE(REG_UBACKREF);
962     RETV(BACKREF, (chr)DIGITVAL(c));
963     break;
964     default:
965     if (iscalnum(c)) {
966     NOTE(REG_UBSALNUM);
967     NOTE(REG_UUNSPEC);
968     }
969     RETV(PLAIN, c);
970     break;
971     }
972    
973     assert(NOTREACHED);
974     }
975    
976     /*
977     - skip - skip white space and comments in expanded form
978     ^ static VOID skip(struct vars *);
979     */
980     static VOID
981     skip(v)
982     struct vars *v;
983     {
984     chr *start = v->now;
985    
986     assert(v->cflags&REG_EXPANDED);
987    
988     for (;;) {
989     while (!ATEOS() && iscspace(*v->now))
990     v->now++;
991     if (ATEOS() || *v->now != CHR('#'))
992     break; /* NOTE BREAK OUT */
993     assert(NEXT1('#'));
994     while (!ATEOS() && *v->now != CHR('\n'))
995     v->now++;
996     /* leave the newline to be picked up by the iscspace loop */
997     }
998    
999     if (v->now != start)
1000     NOTE(REG_UNONPOSIX);
1001     }
1002    
1003     /*
1004     - newline - return the chr for a newline
1005     * This helps confine use of CHR to this source file.
1006     ^ static chr newline(NOPARMS);
1007     */
1008     static chr
1009     newline()
1010     {
1011     return CHR('\n');
1012     }
1013    
1014     /*
1015     - ch - return the chr sequence for regc_locale.c's fake collating element ch
1016     * This helps confine use of CHR to this source file. Beware that the caller
1017     * knows how long the sequence is.
1018     ^ #ifdef REG_DEBUG
1019     ^ static chr *ch(NOPARMS);
1020     ^ #endif
1021     */
1022     #ifdef REG_DEBUG
1023     static chr *
1024     ch()
1025     {
1026     static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
1027    
1028     return chstr;
1029     }
1030     #endif
1031    
1032     /*
1033     - chrnamed - return the chr known by a given (chr string) name
1034     * The code is a bit clumsy, but this routine gets only such specialized
1035     * use that it hardly matters.
1036     ^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
1037     */
1038     static chr
1039     chrnamed(v, startp, endp, lastresort)
1040     struct vars *v;
1041     chr *startp; /* start of name */
1042     chr *endp; /* just past end of name */
1043     pchr lastresort; /* what to return if name lookup fails */
1044     {
1045     celt c;
1046     int errsave;
1047     int e;
1048     struct cvec *cv;
1049    
1050     errsave = v->err;
1051     v->err = 0;
1052     c = element(v, startp, endp);
1053     e = v->err;
1054     v->err = errsave;
1055    
1056     if (e != 0)
1057     return (chr)lastresort;
1058    
1059     cv = range(v, c, c, 0);
1060     if (cv->nchrs == 0)
1061     return (chr)lastresort;
1062     return cv->chrs[0];
1063     }
1064    
1065     /* $History: regc_lex.c $
1066     *
1067     * ***************** Version 1 *****************
1068     * User: Dtashley Date: 1/02/01 Time: 12:05a
1069     * Created in $/IjuScripter, IjuConsole/Source/Tcl Base
1070     * Initial check-in.
1071     */
1072    
1073     /* End of REGC_LEX.C */

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25