1 |
dashley |
71 |
/* $Header$ */ |
2 |
|
|
/* |
3 |
|
|
* lexical analyzer |
4 |
|
|
* This file is #included by regcomp.c. |
5 |
|
|
* |
6 |
|
|
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. |
7 |
|
|
* |
8 |
|
|
* Development of this software was funded, in part, by Cray Research Inc., |
9 |
|
|
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics |
10 |
|
|
* Corporation, none of whom are responsible for the results. The author |
11 |
|
|
* thanks all of them. |
12 |
|
|
* |
13 |
|
|
* Redistribution and use in source and binary forms -- with or without |
14 |
|
|
* modification -- are permitted for any purpose, provided that |
15 |
|
|
* redistributions in source form retain this entire copyright notice and |
16 |
|
|
* indicate the origin and nature of any modifications. |
17 |
|
|
* |
18 |
|
|
* I'd appreciate being given credit for this package in the documentation |
19 |
|
|
* of software which uses it, but that is not a requirement. |
20 |
|
|
* |
21 |
|
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
22 |
|
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
23 |
|
|
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
24 |
|
|
* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
25 |
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
26 |
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
27 |
|
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
28 |
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
29 |
|
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
30 |
|
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 |
|
|
* |
32 |
|
|
*/ |
33 |
|
|
|
34 |
|
|
/* scanning macros (know about v) */ |
35 |
|
|
#define ATEOS() (v->now >= v->stop) |
36 |
|
|
#define HAVE(n) (v->stop - v->now >= (n)) |
37 |
|
|
#define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) |
38 |
|
|
#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) |
39 |
|
|
#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ |
40 |
|
|
*(v->now+1) == CHR(b) && \ |
41 |
|
|
*(v->now+2) == CHR(c)) |
42 |
|
|
#define SET(c) (v->nexttype = (c)) |
43 |
|
|
#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) |
44 |
|
|
#define RET(c) return (SET(c), 1) |
45 |
|
|
#define RETV(c, n) return (SETV(c, n), 1) |
46 |
|
|
#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ |
47 |
|
|
#define LASTTYPE(t) (v->lasttype == (t)) |
48 |
|
|
|
49 |
|
|
/* lexical contexts */ |
50 |
|
|
#define L_ERE 1 /* mainline ERE/ARE */ |
51 |
|
|
#define L_BRE 2 /* mainline BRE */ |
52 |
|
|
#define L_Q 3 /* REG_QUOTE */ |
53 |
|
|
#define L_EBND 4 /* ERE/ARE bound */ |
54 |
|
|
#define L_BBND 5 /* BRE bound */ |
55 |
|
|
#define L_BRACK 6 /* brackets */ |
56 |
|
|
#define L_CEL 7 /* collating element */ |
57 |
|
|
#define L_ECL 8 /* equivalence class */ |
58 |
|
|
#define L_CCL 9 /* character class */ |
59 |
|
|
#define INTOCON(c) (v->lexcon = (c)) |
60 |
|
|
#define INCON(con) (v->lexcon == (con)) |
61 |
|
|
|
62 |
|
|
/* construct pointer past end of chr array */ |
63 |
|
|
#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) |
64 |
|
|
|
65 |
|
|
/* |
66 |
|
|
- lexstart - set up lexical stuff, scan leading options |
67 |
|
|
^ static VOID lexstart(struct vars *); |
68 |
|
|
*/ |
69 |
|
|
static VOID |
70 |
|
|
lexstart(v) |
71 |
|
|
struct vars *v; |
72 |
|
|
{ |
73 |
|
|
prefixes(v); /* may turn on new type bits etc. */ |
74 |
|
|
NOERR(); |
75 |
|
|
|
76 |
|
|
if (v->cflags®_QUOTE) { |
77 |
|
|
assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))); |
78 |
|
|
INTOCON(L_Q); |
79 |
|
|
} else if (v->cflags®_EXTENDED) { |
80 |
|
|
assert(!(v->cflags®_QUOTE)); |
81 |
|
|
INTOCON(L_ERE); |
82 |
|
|
} else { |
83 |
|
|
assert(!(v->cflags&(REG_QUOTE|REG_ADVF))); |
84 |
|
|
INTOCON(L_BRE); |
85 |
|
|
} |
86 |
|
|
|
87 |
|
|
v->nexttype = EMPTY; /* remember we were at the start */ |
88 |
|
|
next(v); /* set up the first token */ |
89 |
|
|
} |
90 |
|
|
|
91 |
|
|
/* |
92 |
|
|
- prefixes - implement various special prefixes |
93 |
|
|
^ static VOID prefixes(struct vars *); |
94 |
|
|
*/ |
95 |
|
|
static VOID |
96 |
|
|
prefixes(v) |
97 |
|
|
struct vars *v; |
98 |
|
|
{ |
99 |
|
|
/* literal string doesn't get any of this stuff */ |
100 |
|
|
if (v->cflags®_QUOTE) |
101 |
|
|
return; |
102 |
|
|
|
103 |
|
|
/* initial "***" gets special things */ |
104 |
|
|
if (HAVE(4) && NEXT3('*', '*', '*')) |
105 |
|
|
switch (*(v->now + 3)) { |
106 |
|
|
case CHR('?'): /* "***?" error, msg shows version */ |
107 |
|
|
ERR(REG_BADPAT); |
108 |
|
|
return; /* proceed no further */ |
109 |
|
|
break; |
110 |
|
|
case CHR('='): /* "***=" shifts to literal string */ |
111 |
|
|
NOTE(REG_UNONPOSIX); |
112 |
|
|
v->cflags |= REG_QUOTE; |
113 |
|
|
v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE); |
114 |
|
|
v->now += 4; |
115 |
|
|
return; /* and there can be no more prefixes */ |
116 |
|
|
break; |
117 |
|
|
case CHR(':'): /* "***:" shifts to AREs */ |
118 |
|
|
NOTE(REG_UNONPOSIX); |
119 |
|
|
v->cflags |= REG_ADVANCED; |
120 |
|
|
v->now += 4; |
121 |
|
|
break; |
122 |
|
|
default: /* otherwise *** is just an error */ |
123 |
|
|
ERR(REG_BADRPT); |
124 |
|
|
return; |
125 |
|
|
break; |
126 |
|
|
} |
127 |
|
|
|
128 |
|
|
/* BREs and EREs don't get embedded options */ |
129 |
|
|
if ((v->cflags®_ADVANCED) != REG_ADVANCED) |
130 |
|
|
return; |
131 |
|
|
|
132 |
|
|
/* embedded options (AREs only) */ |
133 |
|
|
if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) { |
134 |
|
|
NOTE(REG_UNONPOSIX); |
135 |
|
|
v->now += 2; |
136 |
|
|
for (; !ATEOS() && iscalpha(*v->now); v->now++) |
137 |
|
|
switch (*v->now) { |
138 |
|
|
case CHR('b'): /* BREs (but why???) */ |
139 |
|
|
v->cflags &= ~(REG_ADVANCED|REG_QUOTE); |
140 |
|
|
break; |
141 |
|
|
case CHR('c'): /* case sensitive */ |
142 |
|
|
v->cflags &= ~REG_ICASE; |
143 |
|
|
break; |
144 |
|
|
case CHR('e'): /* plain EREs */ |
145 |
|
|
v->cflags |= REG_EXTENDED; |
146 |
|
|
v->cflags &= ~(REG_ADVF|REG_QUOTE); |
147 |
|
|
break; |
148 |
|
|
case CHR('i'): /* case insensitive */ |
149 |
|
|
v->cflags |= REG_ICASE; |
150 |
|
|
break; |
151 |
|
|
case CHR('m'): /* Perloid synonym for n */ |
152 |
|
|
case CHR('n'): /* \n affects ^ $ . [^ */ |
153 |
|
|
v->cflags |= REG_NEWLINE; |
154 |
|
|
break; |
155 |
|
|
case CHR('p'): /* ~Perl, \n affects . [^ */ |
156 |
|
|
v->cflags |= REG_NLSTOP; |
157 |
|
|
v->cflags &= ~REG_NLANCH; |
158 |
|
|
break; |
159 |
|
|
case CHR('q'): /* literal string */ |
160 |
|
|
v->cflags |= REG_QUOTE; |
161 |
|
|
v->cflags &= ~REG_ADVANCED; |
162 |
|
|
break; |
163 |
|
|
case CHR('s'): /* single line, \n ordinary */ |
164 |
|
|
v->cflags &= ~REG_NEWLINE; |
165 |
|
|
break; |
166 |
|
|
case CHR('t'): /* tight syntax */ |
167 |
|
|
v->cflags &= ~REG_EXPANDED; |
168 |
|
|
break; |
169 |
|
|
case CHR('w'): /* weird, \n affects ^ $ only */ |
170 |
|
|
v->cflags &= ~REG_NLSTOP; |
171 |
|
|
v->cflags |= REG_NLANCH; |
172 |
|
|
break; |
173 |
|
|
case CHR('x'): /* expanded syntax */ |
174 |
|
|
v->cflags |= REG_EXPANDED; |
175 |
|
|
break; |
176 |
|
|
default: |
177 |
|
|
ERR(REG_BADOPT); |
178 |
|
|
return; |
179 |
|
|
} |
180 |
|
|
if (!NEXT1(')')) { |
181 |
|
|
ERR(REG_BADOPT); |
182 |
|
|
return; |
183 |
|
|
} |
184 |
|
|
v->now++; |
185 |
|
|
if (v->cflags®_QUOTE) |
186 |
|
|
v->cflags &= ~(REG_EXPANDED|REG_NEWLINE); |
187 |
|
|
} |
188 |
|
|
} |
189 |
|
|
|
190 |
|
|
/* |
191 |
|
|
- lexnest - "call a subroutine", interpolating string at the lexical level |
192 |
|
|
* Note, this is not a very general facility. There are a number of |
193 |
|
|
* implicit assumptions about what sorts of strings can be subroutines. |
194 |
|
|
^ static VOID lexnest(struct vars *, chr *, chr *); |
195 |
|
|
*/ |
196 |
|
|
static VOID |
197 |
|
|
lexnest(v, beginp, endp) |
198 |
|
|
struct vars *v; |
199 |
|
|
chr *beginp; /* start of interpolation */ |
200 |
|
|
chr *endp; /* one past end of interpolation */ |
201 |
|
|
{ |
202 |
|
|
assert(v->savenow == NULL); /* only one level of nesting */ |
203 |
|
|
v->savenow = v->now; |
204 |
|
|
v->savestop = v->stop; |
205 |
|
|
v->now = beginp; |
206 |
|
|
v->stop = endp; |
207 |
|
|
} |
208 |
|
|
|
209 |
|
|
/* |
210 |
|
|
* string constants to interpolate as expansions of things like \d |
211 |
|
|
*/ |
212 |
|
|
static chr backd[] = { /* \d */ |
213 |
|
|
CHR('['), CHR('['), CHR(':'), |
214 |
|
|
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), |
215 |
|
|
CHR(':'), CHR(']'), CHR(']') |
216 |
|
|
}; |
217 |
|
|
static chr backD[] = { /* \D */ |
218 |
|
|
CHR('['), CHR('^'), CHR('['), CHR(':'), |
219 |
|
|
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), |
220 |
|
|
CHR(':'), CHR(']'), CHR(']') |
221 |
|
|
}; |
222 |
|
|
static chr brbackd[] = { /* \d within brackets */ |
223 |
|
|
CHR('['), CHR(':'), |
224 |
|
|
CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), |
225 |
|
|
CHR(':'), CHR(']') |
226 |
|
|
}; |
227 |
|
|
static chr backs[] = { /* \s */ |
228 |
|
|
CHR('['), CHR('['), CHR(':'), |
229 |
|
|
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), |
230 |
|
|
CHR(':'), CHR(']'), CHR(']') |
231 |
|
|
}; |
232 |
|
|
static chr backS[] = { /* \S */ |
233 |
|
|
CHR('['), CHR('^'), CHR('['), CHR(':'), |
234 |
|
|
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), |
235 |
|
|
CHR(':'), CHR(']'), CHR(']') |
236 |
|
|
}; |
237 |
|
|
static chr brbacks[] = { /* \s within brackets */ |
238 |
|
|
CHR('['), CHR(':'), |
239 |
|
|
CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), |
240 |
|
|
CHR(':'), CHR(']') |
241 |
|
|
}; |
242 |
|
|
static chr backw[] = { /* \w */ |
243 |
|
|
CHR('['), CHR('['), CHR(':'), |
244 |
|
|
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), |
245 |
|
|
CHR(':'), CHR(']'), CHR('_'), CHR(']') |
246 |
|
|
}; |
247 |
|
|
static chr backW[] = { /* \W */ |
248 |
|
|
CHR('['), CHR('^'), CHR('['), CHR(':'), |
249 |
|
|
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), |
250 |
|
|
CHR(':'), CHR(']'), CHR('_'), CHR(']') |
251 |
|
|
}; |
252 |
|
|
static chr brbackw[] = { /* \w within brackets */ |
253 |
|
|
CHR('['), CHR(':'), |
254 |
|
|
CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), |
255 |
|
|
CHR(':'), CHR(']'), CHR('_') |
256 |
|
|
}; |
257 |
|
|
|
258 |
|
|
/* |
259 |
|
|
- lexword - interpolate a bracket expression for word characters |
260 |
|
|
* Possibly ought to inquire whether there is a "word" character class. |
261 |
|
|
^ static VOID lexword(struct vars *); |
262 |
|
|
*/ |
263 |
|
|
static VOID |
264 |
|
|
lexword(v) |
265 |
|
|
struct vars *v; |
266 |
|
|
{ |
267 |
|
|
lexnest(v, backw, ENDOF(backw)); |
268 |
|
|
} |
269 |
|
|
|
270 |
|
|
/* |
271 |
|
|
- next - get next token |
272 |
|
|
^ static int next(struct vars *); |
273 |
|
|
*/ |
274 |
|
|
static int /* 1 normal, 0 failure */ |
275 |
|
|
next(v) |
276 |
|
|
struct vars *v; |
277 |
|
|
{ |
278 |
|
|
chr c; |
279 |
|
|
|
280 |
|
|
/* errors yield an infinite sequence of failures */ |
281 |
|
|
if (ISERR()) |
282 |
|
|
return 0; /* the error has set nexttype to EOS */ |
283 |
|
|
|
284 |
|
|
/* remember flavor of last token */ |
285 |
|
|
v->lasttype = v->nexttype; |
286 |
|
|
|
287 |
|
|
/* REG_BOSONLY */ |
288 |
|
|
if (v->nexttype == EMPTY && (v->cflags®_BOSONLY)) { |
289 |
|
|
/* at start of a REG_BOSONLY RE */ |
290 |
|
|
RETV(SBEGIN, 0); /* same as \A */ |
291 |
|
|
} |
292 |
|
|
|
293 |
|
|
/* if we're nested and we've hit end, return to outer level */ |
294 |
|
|
if (v->savenow != NULL && ATEOS()) { |
295 |
|
|
v->now = v->savenow; |
296 |
|
|
v->stop = v->savestop; |
297 |
|
|
v->savenow = v->savestop = NULL; |
298 |
|
|
} |
299 |
|
|
|
300 |
|
|
/* skip white space etc. if appropriate (not in literal or []) */ |
301 |
|
|
if (v->cflags®_EXPANDED) |
302 |
|
|
switch (v->lexcon) { |
303 |
|
|
case L_ERE: |
304 |
|
|
case L_BRE: |
305 |
|
|
case L_EBND: |
306 |
|
|
case L_BBND: |
307 |
|
|
skip(v); |
308 |
|
|
break; |
309 |
|
|
} |
310 |
|
|
|
311 |
|
|
/* handle EOS, depending on context */ |
312 |
|
|
if (ATEOS()) { |
313 |
|
|
switch (v->lexcon) { |
314 |
|
|
case L_ERE: |
315 |
|
|
case L_BRE: |
316 |
|
|
case L_Q: |
317 |
|
|
RET(EOS); |
318 |
|
|
break; |
319 |
|
|
case L_EBND: |
320 |
|
|
case L_BBND: |
321 |
|
|
FAILW(REG_EBRACE); |
322 |
|
|
break; |
323 |
|
|
case L_BRACK: |
324 |
|
|
case L_CEL: |
325 |
|
|
case L_ECL: |
326 |
|
|
case L_CCL: |
327 |
|
|
FAILW(REG_EBRACK); |
328 |
|
|
break; |
329 |
|
|
} |
330 |
|
|
assert(NOTREACHED); |
331 |
|
|
} |
332 |
|
|
|
333 |
|
|
/* okay, time to actually get a character */ |
334 |
|
|
c = *v->now++; |
335 |
|
|
|
336 |
|
|
/* deal with the easy contexts, punt EREs to code below */ |
337 |
|
|
switch (v->lexcon) { |
338 |
|
|
case L_BRE: /* punt BREs to separate function */ |
339 |
|
|
return brenext(v, c); |
340 |
|
|
break; |
341 |
|
|
case L_ERE: /* see below */ |
342 |
|
|
break; |
343 |
|
|
case L_Q: /* literal strings are easy */ |
344 |
|
|
RETV(PLAIN, c); |
345 |
|
|
break; |
346 |
|
|
case L_BBND: /* bounds are fairly simple */ |
347 |
|
|
case L_EBND: |
348 |
|
|
switch (c) { |
349 |
|
|
case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): |
350 |
|
|
case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): |
351 |
|
|
case CHR('8'): case CHR('9'): |
352 |
|
|
RETV(DIGIT, (chr)DIGITVAL(c)); |
353 |
|
|
break; |
354 |
|
|
case CHR(','): |
355 |
|
|
RET(','); |
356 |
|
|
break; |
357 |
|
|
case CHR('}'): /* ERE bound ends with } */ |
358 |
|
|
if (INCON(L_EBND)) { |
359 |
|
|
INTOCON(L_ERE); |
360 |
|
|
if ((v->cflags®_ADVF) && NEXT1('?')) { |
361 |
|
|
v->now++; |
362 |
|
|
NOTE(REG_UNONPOSIX); |
363 |
|
|
RETV('}', 0); |
364 |
|
|
} |
365 |
|
|
RETV('}', 1); |
366 |
|
|
} else |
367 |
|
|
FAILW(REG_BADBR); |
368 |
|
|
break; |
369 |
|
|
case CHR('\\'): /* BRE bound ends with \} */ |
370 |
|
|
if (INCON(L_BBND) && NEXT1('}')) { |
371 |
|
|
v->now++; |
372 |
|
|
INTOCON(L_BRE); |
373 |
|
|
RET('}'); |
374 |
|
|
} else |
375 |
|
|
FAILW(REG_BADBR); |
376 |
|
|
break; |
377 |
|
|
default: |
378 |
|
|
FAILW(REG_BADBR); |
379 |
|
|
break; |
380 |
|
|
} |
381 |
|
|
assert(NOTREACHED); |
382 |
|
|
break; |
383 |
|
|
case L_BRACK: /* brackets are not too hard */ |
384 |
|
|
switch (c) { |
385 |
|
|
case CHR(']'): |
386 |
|
|
if (LASTTYPE('[')) |
387 |
|
|
RETV(PLAIN, c); |
388 |
|
|
else { |
389 |
|
|
INTOCON((v->cflags®_EXTENDED) ? |
390 |
|
|
L_ERE : L_BRE); |
391 |
|
|
RET(']'); |
392 |
|
|
} |
393 |
|
|
break; |
394 |
|
|
case CHR('\\'): |
395 |
|
|
NOTE(REG_UBBS); |
396 |
|
|
if (!(v->cflags®_ADVF)) |
397 |
|
|
RETV(PLAIN, c); |
398 |
|
|
NOTE(REG_UNONPOSIX); |
399 |
|
|
if (ATEOS()) |
400 |
|
|
FAILW(REG_EESCAPE); |
401 |
|
|
(DISCARD)lexescape(v); |
402 |
|
|
switch (v->nexttype) { /* not all escapes okay here */ |
403 |
|
|
case PLAIN: |
404 |
|
|
return 1; |
405 |
|
|
break; |
406 |
|
|
case CCLASS: |
407 |
|
|
switch (v->nextvalue) { |
408 |
|
|
case 'd': |
409 |
|
|
lexnest(v, brbackd, ENDOF(brbackd)); |
410 |
|
|
break; |
411 |
|
|
case 's': |
412 |
|
|
lexnest(v, brbacks, ENDOF(brbacks)); |
413 |
|
|
break; |
414 |
|
|
case 'w': |
415 |
|
|
lexnest(v, brbackw, ENDOF(brbackw)); |
416 |
|
|
break; |
417 |
|
|
default: |
418 |
|
|
FAILW(REG_EESCAPE); |
419 |
|
|
break; |
420 |
|
|
} |
421 |
|
|
/* lexnest done, back up and try again */ |
422 |
|
|
v->nexttype = v->lasttype; |
423 |
|
|
return next(v); |
424 |
|
|
break; |
425 |
|
|
} |
426 |
|
|
/* not one of the acceptable escapes */ |
427 |
|
|
FAILW(REG_EESCAPE); |
428 |
|
|
break; |
429 |
|
|
case CHR('-'): |
430 |
|
|
if (LASTTYPE('[') || NEXT1(']')) |
431 |
|
|
RETV(PLAIN, c); |
432 |
|
|
else |
433 |
|
|
RETV(RANGE, c); |
434 |
|
|
break; |
435 |
|
|
case CHR('['): |
436 |
|
|
if (ATEOS()) |
437 |
|
|
FAILW(REG_EBRACK); |
438 |
|
|
switch (*v->now++) { |
439 |
|
|
case CHR('.'): |
440 |
|
|
INTOCON(L_CEL); |
441 |
|
|
/* might or might not be locale-specific */ |
442 |
|
|
RET(COLLEL); |
443 |
|
|
break; |
444 |
|
|
case CHR('='): |
445 |
|
|
INTOCON(L_ECL); |
446 |
|
|
NOTE(REG_ULOCALE); |
447 |
|
|
RET(ECLASS); |
448 |
|
|
break; |
449 |
|
|
case CHR(':'): |
450 |
|
|
INTOCON(L_CCL); |
451 |
|
|
NOTE(REG_ULOCALE); |
452 |
|
|
RET(CCLASS); |
453 |
|
|
break; |
454 |
|
|
default: /* oops */ |
455 |
|
|
v->now--; |
456 |
|
|
RETV(PLAIN, c); |
457 |
|
|
break; |
458 |
|
|
} |
459 |
|
|
assert(NOTREACHED); |
460 |
|
|
break; |
461 |
|
|
default: |
462 |
|
|
RETV(PLAIN, c); |
463 |
|
|
break; |
464 |
|
|
} |
465 |
|
|
assert(NOTREACHED); |
466 |
|
|
break; |
467 |
|
|
case L_CEL: /* collating elements are easy */ |
468 |
|
|
if (c == CHR('.') && NEXT1(']')) { |
469 |
|
|
v->now++; |
470 |
|
|
INTOCON(L_BRACK); |
471 |
|
|
RETV(END, '.'); |
472 |
|
|
} else |
473 |
|
|
RETV(PLAIN, c); |
474 |
|
|
break; |
475 |
|
|
case L_ECL: /* ditto equivalence classes */ |
476 |
|
|
if (c == CHR('=') && NEXT1(']')) { |
477 |
|
|
v->now++; |
478 |
|
|
INTOCON(L_BRACK); |
479 |
|
|
RETV(END, '='); |
480 |
|
|
} else |
481 |
|
|
RETV(PLAIN, c); |
482 |
|
|
break; |
483 |
|
|
case L_CCL: /* ditto character classes */ |
484 |
|
|
if (c == CHR(':') && NEXT1(']')) { |
485 |
|
|
v->now++; |
486 |
|
|
INTOCON(L_BRACK); |
487 |
|
|
RETV(END, ':'); |
488 |
|
|
} else |
489 |
|
|
RETV(PLAIN, c); |
490 |
|
|
break; |
491 |
|
|
default: |
492 |
|
|
assert(NOTREACHED); |
493 |
|
|
break; |
494 |
|
|
} |
495 |
|
|
|
496 |
|
|
/* that got rid of everything except EREs and AREs */ |
497 |
|
|
assert(INCON(L_ERE)); |
498 |
|
|
|
499 |
|
|
/* deal with EREs and AREs, except for backslashes */ |
500 |
|
|
switch (c) { |
501 |
|
|
case CHR('|'): |
502 |
|
|
RET('|'); |
503 |
|
|
break; |
504 |
|
|
case CHR('*'): |
505 |
|
|
if ((v->cflags®_ADVF) && NEXT1('?')) { |
506 |
|
|
v->now++; |
507 |
|
|
NOTE(REG_UNONPOSIX); |
508 |
|
|
RETV('*', 0); |
509 |
|
|
} |
510 |
|
|
RETV('*', 1); |
511 |
|
|
break; |
512 |
|
|
case CHR('+'): |
513 |
|
|
if ((v->cflags®_ADVF) && NEXT1('?')) { |
514 |
|
|
v->now++; |
515 |
|
|
NOTE(REG_UNONPOSIX); |
516 |
|
|
RETV('+', 0); |
517 |
|
|
} |
518 |
|
|
RETV('+', 1); |
519 |
|
|
break; |
520 |
|
|
case CHR('?'): |
521 |
|
|
if ((v->cflags®_ADVF) && NEXT1('?')) { |
522 |
|
|
v->now++; |
523 |
|
|
NOTE(REG_UNONPOSIX); |
524 |
|
|
RETV('?', 0); |
525 |
|
|
} |
526 |
|
|
RETV('?', 1); |
527 |
|
|
break; |
528 |
|
|
case CHR('{'): /* bounds start or plain character */ |
529 |
|
|
if (v->cflags®_EXPANDED) |
530 |
|
|
skip(v); |
531 |
|
|
if (ATEOS() || !iscdigit(*v->now)) { |
532 |
|
|
NOTE(REG_UBRACES); |
533 |
|
|
NOTE(REG_UUNSPEC); |
534 |
|
|
RETV(PLAIN, c); |
535 |
|
|
} else { |
536 |
|
|
NOTE(REG_UBOUNDS); |
537 |
|
|
INTOCON(L_EBND); |
538 |
|
|
RET('{'); |
539 |
|
|
} |
540 |
|
|
assert(NOTREACHED); |
541 |
|
|
break; |
542 |
|
|
case CHR('('): /* parenthesis, or advanced extension */ |
543 |
|
|
if ((v->cflags®_ADVF) && NEXT1('?')) { |
544 |
|
|
NOTE(REG_UNONPOSIX); |
545 |
|
|
v->now++; |
546 |
|
|
switch (*v->now++) { |
547 |
|
|
case CHR(':'): /* non-capturing paren */ |
548 |
|
|
RETV('(', 0); |
549 |
|
|
break; |
550 |
|
|
case CHR('#'): /* comment */ |
551 |
|
|
while (!ATEOS() && *v->now != CHR(')')) |
552 |
|
|
v->now++; |
553 |
|
|
if (!ATEOS()) |
554 |
|
|
v->now++; |
555 |
|
|
assert(v->nexttype == v->lasttype); |
556 |
|
|
return next(v); |
557 |
|
|
break; |
558 |
|
|
case CHR('='): /* positive lookahead */ |
559 |
|
|
NOTE(REG_ULOOKAHEAD); |
560 |
|
|
RETV(LACON, 1); |
561 |
|
|
break; |
562 |
|
|
case CHR('!'): /* negative lookahead */ |
563 |
|
|
NOTE(REG_ULOOKAHEAD); |
564 |
|
|
RETV(LACON, 0); |
565 |
|
|
break; |
566 |
|
|
default: |
567 |
|
|
FAILW(REG_BADRPT); |
568 |
|
|
break; |
569 |
|
|
} |
570 |
|
|
assert(NOTREACHED); |
571 |
|
|
} |
572 |
|
|
if (v->cflags®_NOSUB) |
573 |
|
|
RETV('(', 0); /* all parens non-capturing */ |
574 |
|
|
else |
575 |
|
|
RETV('(', 1); |
576 |
|
|
break; |
577 |
|
|
case CHR(')'): |
578 |
|
|
if (LASTTYPE('(')) { |
579 |
|
|
NOTE(REG_UUNSPEC); |
580 |
|
|
} |
581 |
|
|
RETV(')', c); |
582 |
|
|
break; |
583 |
|
|
case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ |
584 |
|
|
if (HAVE(6) && *(v->now+0) == CHR('[') && |
585 |
|
|
*(v->now+1) == CHR(':') && |
586 |
|
|
(*(v->now+2) == CHR('<') || |
587 |
|
|
*(v->now+2) == CHR('>')) && |
588 |
|
|
*(v->now+3) == CHR(':') && |
589 |
|
|
*(v->now+4) == CHR(']') && |
590 |
|
|
*(v->now+5) == CHR(']')) { |
591 |
|
|
c = *(v->now+2); |
592 |
|
|
v->now += 6; |
593 |
|
|
NOTE(REG_UNONPOSIX); |
594 |
|
|
RET((c == CHR('<')) ? '<' : '>'); |
595 |
|
|
} |
596 |
|
|
INTOCON(L_BRACK); |
597 |
|
|
if (NEXT1('^')) { |
598 |
|
|
v->now++; |
599 |
|
|
RETV('[', 0); |
600 |
|
|
} |
601 |
|
|
RETV('[', 1); |
602 |
|
|
break; |
603 |
|
|
case CHR('.'): |
604 |
|
|
RET('.'); |
605 |
|
|
break; |
606 |
|
|
case CHR('^'): |
607 |
|
|
RET('^'); |
608 |
|
|
break; |
609 |
|
|
case CHR('$'): |
610 |
|
|
RET('$'); |
611 |
|
|
break; |
612 |
|
|
case CHR('\\'): /* mostly punt backslashes to code below */ |
613 |
|
|
if (ATEOS()) |
614 |
|
|
FAILW(REG_EESCAPE); |
615 |
|
|
break; |
616 |
|
|
default: /* ordinary character */ |
617 |
|
|
RETV(PLAIN, c); |
618 |
|
|
break; |
619 |
|
|
} |
620 |
|
|
|
621 |
|
|
/* ERE/ARE backslash handling; backslash already eaten */ |
622 |
|
|
assert(!ATEOS()); |
623 |
|
|
if (!(v->cflags®_ADVF)) { /* only AREs have non-trivial escapes */ |
624 |
|
|
if (iscalnum(*v->now)) { |
625 |
|
|
NOTE(REG_UBSALNUM); |
626 |
|
|
NOTE(REG_UUNSPEC); |
627 |
|
|
} |
628 |
|
|
RETV(PLAIN, *v->now++); |
629 |
|
|
} |
630 |
|
|
(DISCARD)lexescape(v); |
631 |
|
|
if (ISERR()) |
632 |
|
|
FAILW(REG_EESCAPE); |
633 |
|
|
if (v->nexttype == CCLASS) { /* fudge at lexical level */ |
634 |
|
|
switch (v->nextvalue) { |
635 |
|
|
case 'd': lexnest(v, backd, ENDOF(backd)); break; |
636 |
|
|
case 'D': lexnest(v, backD, ENDOF(backD)); break; |
637 |
|
|
case 's': lexnest(v, backs, ENDOF(backs)); break; |
638 |
|
|
case 'S': lexnest(v, backS, ENDOF(backS)); break; |
639 |
|
|
case 'w': lexnest(v, backw, ENDOF(backw)); break; |
640 |
|
|
case 'W': lexnest(v, backW, ENDOF(backW)); break; |
641 |
|
|
default: |
642 |
|
|
assert(NOTREACHED); |
643 |
|
|
FAILW(REG_ASSERT); |
644 |
|
|
break; |
645 |
|
|
} |
646 |
|
|
/* lexnest done, back up and try again */ |
647 |
|
|
v->nexttype = v->lasttype; |
648 |
|
|
return next(v); |
649 |
|
|
} |
650 |
|
|
/* otherwise, lexescape has already done the work */ |
651 |
|
|
return !ISERR(); |
652 |
|
|
} |
653 |
|
|
|
654 |
|
|
/* |
655 |
|
|
- lexescape - parse an ARE backslash escape (backslash already eaten) |
656 |
|
|
* Note slightly nonstandard use of the CCLASS type code. |
657 |
|
|
^ static int lexescape(struct vars *); |
658 |
|
|
*/ |
659 |
|
|
static int /* not actually used, but convenient for RETV */ |
660 |
|
|
lexescape(v) |
661 |
|
|
struct vars *v; |
662 |
|
|
{ |
663 |
|
|
chr c; |
664 |
|
|
static chr alert[] = { |
665 |
|
|
CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') |
666 |
|
|
}; |
667 |
|
|
static chr esc[] = { |
668 |
|
|
CHR('E'), CHR('S'), CHR('C') |
669 |
|
|
}; |
670 |
|
|
chr *save; |
671 |
|
|
|
672 |
|
|
assert(v->cflags®_ADVF); |
673 |
|
|
|
674 |
|
|
assert(!ATEOS()); |
675 |
|
|
c = *v->now++; |
676 |
|
|
if (!iscalnum(c)) |
677 |
|
|
RETV(PLAIN, c); |
678 |
|
|
|
679 |
|
|
NOTE(REG_UNONPOSIX); |
680 |
|
|
switch (c) { |
681 |
|
|
case CHR('a'): |
682 |
|
|
RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); |
683 |
|
|
break; |
684 |
|
|
case CHR('A'): |
685 |
|
|
RETV(SBEGIN, 0); |
686 |
|
|
break; |
687 |
|
|
case CHR('b'): |
688 |
|
|
RETV(PLAIN, CHR('\b')); |
689 |
|
|
break; |
690 |
|
|
case CHR('B'): |
691 |
|
|
RETV(PLAIN, CHR('\\')); |
692 |
|
|
break; |
693 |
|
|
case CHR('c'): |
694 |
|
|
NOTE(REG_UUNPORT); |
695 |
|
|
if (ATEOS()) |
696 |
|
|
FAILW(REG_EESCAPE); |
697 |
|
|
RETV(PLAIN, (chr)(*v->now++ & 037)); |
698 |
|
|
break; |
699 |
|
|
case CHR('d'): |
700 |
|
|
NOTE(REG_ULOCALE); |
701 |
|
|
RETV(CCLASS, 'd'); |
702 |
|
|
break; |
703 |
|
|
case CHR('D'): |
704 |
|
|
NOTE(REG_ULOCALE); |
705 |
|
|
RETV(CCLASS, 'D'); |
706 |
|
|
break; |
707 |
|
|
case CHR('e'): |
708 |
|
|
NOTE(REG_UUNPORT); |
709 |
|
|
RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); |
710 |
|
|
break; |
711 |
|
|
case CHR('f'): |
712 |
|
|
RETV(PLAIN, CHR('\f')); |
713 |
|
|
break; |
714 |
|
|
case CHR('m'): |
715 |
|
|
RET('<'); |
716 |
|
|
break; |
717 |
|
|
case CHR('M'): |
718 |
|
|
RET('>'); |
719 |
|
|
break; |
720 |
|
|
case CHR('n'): |
721 |
|
|
RETV(PLAIN, CHR('\n')); |
722 |
|
|
break; |
723 |
|
|
case CHR('r'): |
724 |
|
|
RETV(PLAIN, CHR('\r')); |
725 |
|
|
break; |
726 |
|
|
case CHR('s'): |
727 |
|
|
NOTE(REG_ULOCALE); |
728 |
|
|
RETV(CCLASS, 's'); |
729 |
|
|
break; |
730 |
|
|
case CHR('S'): |
731 |
|
|
NOTE(REG_ULOCALE); |
732 |
|
|
RETV(CCLASS, 'S'); |
733 |
|
|
break; |
734 |
|
|
case CHR('t'): |
735 |
|
|
RETV(PLAIN, CHR('\t')); |
736 |
|
|
break; |
737 |
|
|
case CHR('u'): |
738 |
|
|
c = lexdigits(v, 16, 4, 4); |
739 |
|
|
if (ISERR()) |
740 |
|
|
FAILW(REG_EESCAPE); |
741 |
|
|
RETV(PLAIN, c); |
742 |
|
|
break; |
743 |
|
|
case CHR('U'): |
744 |
|
|
c = lexdigits(v, 16, 8, 8); |
745 |
|
|
if (ISERR()) |
746 |
|
|
FAILW(REG_EESCAPE); |
747 |
|
|
RETV(PLAIN, c); |
748 |
|
|
break; |
749 |
|
|
case CHR('v'): |
750 |
|
|
RETV(PLAIN, CHR('\v')); |
751 |
|
|
break; |
752 |
|
|
case CHR('w'): |
753 |
|
|
NOTE(REG_ULOCALE); |
754 |
|
|
RETV(CCLASS, 'w'); |
755 |
|
|
break; |
756 |
|
|
case CHR('W'): |
757 |
|
|
NOTE(REG_ULOCALE); |
758 |
|
|
RETV(CCLASS, 'W'); |
759 |
|
|
break; |
760 |
|
|
case CHR('x'): |
761 |
|
|
NOTE(REG_UUNPORT); |
762 |
|
|
c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ |
763 |
|
|
if (ISERR()) |
764 |
|
|
FAILW(REG_EESCAPE); |
765 |
|
|
RETV(PLAIN, c); |
766 |
|
|
break; |
767 |
|
|
case CHR('y'): |
768 |
|
|
NOTE(REG_ULOCALE); |
769 |
|
|
RETV(WBDRY, 0); |
770 |
|
|
break; |
771 |
|
|
case CHR('Y'): |
772 |
|
|
NOTE(REG_ULOCALE); |
773 |
|
|
RETV(NWBDRY, 0); |
774 |
|
|
break; |
775 |
|
|
case CHR('Z'): |
776 |
|
|
RETV(SEND, 0); |
777 |
|
|
break; |
778 |
|
|
case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): |
779 |
|
|
case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): |
780 |
|
|
case CHR('9'): |
781 |
|
|
save = v->now; |
782 |
|
|
v->now--; /* put first digit back */ |
783 |
|
|
c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ |
784 |
|
|
if (ISERR()) |
785 |
|
|
FAILW(REG_EESCAPE); |
786 |
|
|
/* ugly heuristic (first test is "exactly 1 digit?") */ |
787 |
|
|
if (v->now - save == 0 || (int)c <= v->nsubexp) { |
788 |
|
|
NOTE(REG_UBACKREF); |
789 |
|
|
RETV(BACKREF, (chr)c); |
790 |
|
|
} |
791 |
|
|
/* oops, doesn't look like it's a backref after all... */ |
792 |
|
|
v->now = save; |
793 |
|
|
/* and fall through into octal number */ |
794 |
|
|
case CHR('0'): |
795 |
|
|
NOTE(REG_UUNPORT); |
796 |
|
|
v->now--; /* put first digit back */ |
797 |
|
|
c = lexdigits(v, 8, 1, 3); |
798 |
|
|
if (ISERR()) |
799 |
|
|
FAILW(REG_EESCAPE); |
800 |
|
|
RETV(PLAIN, c); |
801 |
|
|
break; |
802 |
|
|
default: |
803 |
|
|
assert(iscalpha(c)); |
804 |
|
|
FAILW(REG_EESCAPE); /* unknown alphabetic escape */ |
805 |
|
|
break; |
806 |
|
|
} |
807 |
|
|
assert(NOTREACHED); |
808 |
|
|
} |
809 |
|
|
|
810 |
|
|
/* |
811 |
|
|
- lexdigits - slurp up digits and return chr value |
812 |
|
|
^ static chr lexdigits(struct vars *, int, int, int); |
813 |
|
|
*/ |
814 |
|
|
static chr /* chr value; errors signalled via ERR */ |
815 |
|
|
lexdigits(v, base, minlen, maxlen) |
816 |
|
|
struct vars *v; |
817 |
|
|
int base; |
818 |
|
|
int minlen; |
819 |
|
|
int maxlen; |
820 |
|
|
{ |
821 |
|
|
uchr n; /* unsigned to avoid overflow misbehavior */ |
822 |
|
|
int len; |
823 |
|
|
chr c; |
824 |
|
|
int d; |
825 |
|
|
CONST uchr ub = (uchr) base; |
826 |
|
|
|
827 |
|
|
n = 0; |
828 |
|
|
for (len = 0; len < maxlen && !ATEOS(); len++) { |
829 |
|
|
c = *v->now++; |
830 |
|
|
switch (c) { |
831 |
|
|
case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): |
832 |
|
|
case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): |
833 |
|
|
case CHR('8'): case CHR('9'): |
834 |
|
|
d = DIGITVAL(c); |
835 |
|
|
break; |
836 |
|
|
case CHR('a'): case CHR('A'): d = 10; break; |
837 |
|
|
case CHR('b'): case CHR('B'): d = 11; break; |
838 |
|
|
case CHR('c'): case CHR('C'): d = 12; break; |
839 |
|
|
case CHR('d'): case CHR('D'): d = 13; break; |
840 |
|
|
case CHR('e'): case CHR('E'): d = 14; break; |
841 |
|
|
case CHR('f'): case CHR('F'): d = 15; break; |
842 |
|
|
default: |
843 |
|
|
v->now--; /* oops, not a digit at all */ |
844 |
|
|
d = -1; |
845 |
|
|
break; |
846 |
|
|
} |
847 |
|
|
|
848 |
|
|
if (d >= base) { /* not a plausible digit */ |
849 |
|
|
v->now--; |
850 |
|
|
d = -1; |
851 |
|
|
} |
852 |
|
|
if (d < 0) |
853 |
|
|
break; /* NOTE BREAK OUT */ |
854 |
|
|
n = n*ub + (uchr)d; |
855 |
|
|
} |
856 |
|
|
if (len < minlen) |
857 |
|
|
ERR(REG_EESCAPE); |
858 |
|
|
|
859 |
|
|
return (chr)n; |
860 |
|
|
} |
861 |
|
|
|
862 |
|
|
/* |
863 |
|
|
- brenext - get next BRE token |
864 |
|
|
* This is much like EREs except for all the stupid backslashes and the |
865 |
|
|
* context-dependency of some things. |
866 |
|
|
^ static int brenext(struct vars *, pchr); |
867 |
|
|
*/ |
868 |
|
|
static int /* 1 normal, 0 failure */ |
869 |
|
|
brenext(v, pc) |
870 |
|
|
struct vars *v; |
871 |
|
|
pchr pc; |
872 |
|
|
{ |
873 |
|
|
chr c = (chr)pc; |
874 |
|
|
|
875 |
|
|
switch (c) { |
876 |
|
|
case CHR('*'): |
877 |
|
|
if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) |
878 |
|
|
RETV(PLAIN, c); |
879 |
|
|
RET('*'); |
880 |
|
|
break; |
881 |
|
|
case CHR('['): |
882 |
|
|
if (HAVE(6) && *(v->now+0) == CHR('[') && |
883 |
|
|
*(v->now+1) == CHR(':') && |
884 |
|
|
(*(v->now+2) == CHR('<') || |
885 |
|
|
*(v->now+2) == CHR('>')) && |
886 |
|
|
*(v->now+3) == CHR(':') && |
887 |
|
|
*(v->now+4) == CHR(']') && |
888 |
|
|
*(v->now+5) == CHR(']')) { |
889 |
|
|
c = *(v->now+2); |
890 |
|
|
v->now += 6; |
891 |
|
|
NOTE(REG_UNONPOSIX); |
892 |
|
|
RET((c == CHR('<')) ? '<' : '>'); |
893 |
|
|
} |
894 |
|
|
INTOCON(L_BRACK); |
895 |
|
|
if (NEXT1('^')) { |
896 |
|
|
v->now++; |
897 |
|
|
RETV('[', 0); |
898 |
|
|
} |
899 |
|
|
RETV('[', 1); |
900 |
|
|
break; |
901 |
|
|
case CHR('.'): |
902 |
|
|
RET('.'); |
903 |
|
|
break; |
904 |
|
|
case CHR('^'): |
905 |
|
|
if (LASTTYPE(EMPTY)) |
906 |
|
|
RET('^'); |
907 |
|
|
if (LASTTYPE('(')) { |
908 |
|
|
NOTE(REG_UUNSPEC); |
909 |
|
|
RET('^'); |
910 |
|
|
} |
911 |
|
|
RETV(PLAIN, c); |
912 |
|
|
break; |
913 |
|
|
case CHR('$'): |
914 |
|
|
if (v->cflags®_EXPANDED) |
915 |
|
|
skip(v); |
916 |
|
|
if (ATEOS()) |
917 |
|
|
RET('$'); |
918 |
|
|
if (NEXT2('\\', ')')) { |
919 |
|
|
NOTE(REG_UUNSPEC); |
920 |
|
|
RET('$'); |
921 |
|
|
} |
922 |
|
|
RETV(PLAIN, c); |
923 |
|
|
break; |
924 |
|
|
case CHR('\\'): |
925 |
|
|
break; /* see below */ |
926 |
|
|
default: |
927 |
|
|
RETV(PLAIN, c); |
928 |
|
|
break; |
929 |
|
|
} |
930 |
|
|
|
931 |
|
|
assert(c == CHR('\\')); |
932 |
|
|
|
933 |
|
|
if (ATEOS()) |
934 |
|
|
FAILW(REG_EESCAPE); |
935 |
|
|
|
936 |
|
|
c = *v->now++; |
937 |
|
|
switch (c) { |
938 |
|
|
case CHR('{'): |
939 |
|
|
INTOCON(L_BBND); |
940 |
|
|
NOTE(REG_UBOUNDS); |
941 |
|
|
RET('{'); |
942 |
|
|
break; |
943 |
|
|
case CHR('('): |
944 |
|
|
RETV('(', 1); |
945 |
|
|
break; |
946 |
|
|
case CHR(')'): |
947 |
|
|
RETV(')', c); |
948 |
|
|
break; |
949 |
|
|
case CHR('<'): |
950 |
|
|
NOTE(REG_UNONPOSIX); |
951 |
|
|
RET('<'); |
952 |
|
|
break; |
953 |
|
|
case CHR('>'): |
954 |
|
|
NOTE(REG_UNONPOSIX); |
955 |
|
|
RET('>'); |
956 |
|
|
break; |
957 |
|
|
case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): |
958 |
|
|
case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): |
959 |
|
|
case CHR('9'): |
960 |
|
|
NOTE(REG_UBACKREF); |
961 |
|
|
RETV(BACKREF, (chr)DIGITVAL(c)); |
962 |
|
|
break; |
963 |
|
|
default: |
964 |
|
|
if (iscalnum(c)) { |
965 |
|
|
NOTE(REG_UBSALNUM); |
966 |
|
|
NOTE(REG_UUNSPEC); |
967 |
|
|
} |
968 |
|
|
RETV(PLAIN, c); |
969 |
|
|
break; |
970 |
|
|
} |
971 |
|
|
|
972 |
|
|
assert(NOTREACHED); |
973 |
|
|
} |
974 |
|
|
|
975 |
|
|
/* |
976 |
|
|
- skip - skip white space and comments in expanded form |
977 |
|
|
^ static VOID skip(struct vars *); |
978 |
|
|
*/ |
979 |
|
|
static VOID |
980 |
|
|
skip(v) |
981 |
|
|
struct vars *v; |
982 |
|
|
{ |
983 |
|
|
chr *start = v->now; |
984 |
|
|
|
985 |
|
|
assert(v->cflags®_EXPANDED); |
986 |
|
|
|
987 |
|
|
for (;;) { |
988 |
|
|
while (!ATEOS() && iscspace(*v->now)) |
989 |
|
|
v->now++; |
990 |
|
|
if (ATEOS() || *v->now != CHR('#')) |
991 |
|
|
break; /* NOTE BREAK OUT */ |
992 |
|
|
assert(NEXT1('#')); |
993 |
|
|
while (!ATEOS() && *v->now != CHR('\n')) |
994 |
|
|
v->now++; |
995 |
|
|
/* leave the newline to be picked up by the iscspace loop */ |
996 |
|
|
} |
997 |
|
|
|
998 |
|
|
if (v->now != start) |
999 |
|
|
NOTE(REG_UNONPOSIX); |
1000 |
|
|
} |
1001 |
|
|
|
1002 |
|
|
/* |
1003 |
|
|
- newline - return the chr for a newline |
1004 |
|
|
* This helps confine use of CHR to this source file. |
1005 |
|
|
^ static chr newline(NOPARMS); |
1006 |
|
|
*/ |
1007 |
|
|
static chr |
1008 |
|
|
newline() |
1009 |
|
|
{ |
1010 |
|
|
return CHR('\n'); |
1011 |
|
|
} |
1012 |
|
|
|
1013 |
|
|
/* |
1014 |
|
|
- ch - return the chr sequence for regc_locale.c's fake collating element ch |
1015 |
|
|
* This helps confine use of CHR to this source file. Beware that the caller |
1016 |
|
|
* knows how long the sequence is. |
1017 |
|
|
^ #ifdef REG_DEBUG |
1018 |
|
|
^ static chr *ch(NOPARMS); |
1019 |
|
|
^ #endif |
1020 |
|
|
*/ |
1021 |
|
|
#ifdef REG_DEBUG |
1022 |
|
|
static chr * |
1023 |
|
|
ch() |
1024 |
|
|
{ |
1025 |
|
|
static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') }; |
1026 |
|
|
|
1027 |
|
|
return chstr; |
1028 |
|
|
} |
1029 |
|
|
#endif |
1030 |
|
|
|
1031 |
|
|
/* |
1032 |
|
|
- chrnamed - return the chr known by a given (chr string) name |
1033 |
|
|
* The code is a bit clumsy, but this routine gets only such specialized |
1034 |
|
|
* use that it hardly matters. |
1035 |
|
|
^ static chr chrnamed(struct vars *, chr *, chr *, pchr); |
1036 |
|
|
*/ |
1037 |
|
|
static chr |
1038 |
|
|
chrnamed(v, startp, endp, lastresort) |
1039 |
|
|
struct vars *v; |
1040 |
|
|
chr *startp; /* start of name */ |
1041 |
|
|
chr *endp; /* just past end of name */ |
1042 |
|
|
pchr lastresort; /* what to return if name lookup fails */ |
1043 |
|
|
{ |
1044 |
|
|
celt c; |
1045 |
|
|
int errsave; |
1046 |
|
|
int e; |
1047 |
|
|
struct cvec *cv; |
1048 |
|
|
|
1049 |
|
|
errsave = v->err; |
1050 |
|
|
v->err = 0; |
1051 |
|
|
c = element(v, startp, endp); |
1052 |
|
|
e = v->err; |
1053 |
|
|
v->err = errsave; |
1054 |
|
|
|
1055 |
|
|
if (e != 0) |
1056 |
|
|
return (chr)lastresort; |
1057 |
|
|
|
1058 |
|
|
cv = range(v, c, c, 0); |
1059 |
|
|
if (cv->nchrs == 0) |
1060 |
|
|
return (chr)lastresort; |
1061 |
|
|
return cv->chrs[0]; |
1062 |
|
|
} |
1063 |
|
|
|
1064 |
|
|
/* End of regc_lex.c */ |