1 |
/* $Header$ */ |
2 |
/* |
3 |
* tclScan.c -- |
4 |
* |
5 |
* This file contains the implementation of the "scan" command. |
6 |
* |
7 |
* Copyright (c) 1998 by Scriptics Corporation. |
8 |
* |
9 |
* See the file "license.terms" for information on usage and redistribution |
10 |
* of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
11 |
* |
12 |
* RCS: @(#) $Id: tclscan.c,v 1.1.1.1 2001/06/13 04:46:00 dtashley Exp $ |
13 |
*/ |
14 |
|
15 |
#include "tclInt.h" |
16 |
|
17 |
/* |
18 |
* Flag values used by Tcl_ScanObjCmd. |
19 |
*/ |
20 |
|
21 |
#define SCAN_NOSKIP 0x1 /* Don't skip blanks. */ |
22 |
#define SCAN_SUPPRESS 0x2 /* Suppress assignment. */ |
23 |
#define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */ |
24 |
#define SCAN_WIDTH 0x8 /* A width value was supplied. */ |
25 |
|
26 |
#define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */ |
27 |
#define SCAN_NODIGITS 0x20 /* No digits have been scanned. */ |
28 |
#define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */ |
29 |
#define SCAN_XOK 0x80 /* An 'x' is allowed. */ |
30 |
#define SCAN_PTOK 0x100 /* Decimal point is allowed. */ |
31 |
#define SCAN_EXPOK 0x200 /* An exponent is allowed. */ |
32 |
|
33 |
|
34 |
/* |
35 |
* The following structure contains the information associated with |
36 |
* a character set. |
37 |
*/ |
38 |
|
39 |
typedef struct CharSet { |
40 |
int exclude; /* 1 if this is an exclusion set. */ |
41 |
int nchars; |
42 |
Tcl_UniChar *chars; |
43 |
int nranges; |
44 |
struct Range { |
45 |
Tcl_UniChar start; |
46 |
Tcl_UniChar end; |
47 |
} *ranges; |
48 |
} CharSet; |
49 |
|
50 |
/* |
51 |
* Declarations for functions used only in this file. |
52 |
*/ |
53 |
|
54 |
static char * BuildCharSet _ANSI_ARGS_((CharSet *cset, char *format)); |
55 |
static int CharInSet _ANSI_ARGS_((CharSet *cset, int ch)); |
56 |
static void ReleaseCharSet _ANSI_ARGS_((CharSet *cset)); |
57 |
static int ValidateFormat _ANSI_ARGS_((Tcl_Interp *interp, char *format, |
58 |
int numVars, int *totalVars)); |
59 |
|
60 |
/* |
61 |
*---------------------------------------------------------------------- |
62 |
* |
63 |
* BuildCharSet -- |
64 |
* |
65 |
* This function examines a character set format specification |
66 |
* and builds a CharSet containing the individual characters and |
67 |
* character ranges specified. |
68 |
* |
69 |
* Results: |
70 |
* Returns the next format position. |
71 |
* |
72 |
* Side effects: |
73 |
* Initializes the charset. |
74 |
* |
75 |
*---------------------------------------------------------------------- |
76 |
*/ |
77 |
|
78 |
static char * |
79 |
BuildCharSet(cset, format) |
80 |
CharSet *cset; |
81 |
char *format; /* Points to first char of set. */ |
82 |
{ |
83 |
Tcl_UniChar ch, start; |
84 |
int offset, nranges; |
85 |
char *end; |
86 |
|
87 |
memset(cset, 0, sizeof(CharSet)); |
88 |
|
89 |
offset = Tcl_UtfToUniChar(format, &ch); |
90 |
if (ch == '^') { |
91 |
cset->exclude = 1; |
92 |
format += offset; |
93 |
offset = Tcl_UtfToUniChar(format, &ch); |
94 |
} |
95 |
end = format + offset; |
96 |
|
97 |
/* |
98 |
* Find the close bracket so we can overallocate the set. |
99 |
*/ |
100 |
|
101 |
if (ch == ']') { |
102 |
end += Tcl_UtfToUniChar(end, &ch); |
103 |
} |
104 |
nranges = 0; |
105 |
while (ch != ']') { |
106 |
if (ch == '-') { |
107 |
nranges++; |
108 |
} |
109 |
end += Tcl_UtfToUniChar(end, &ch); |
110 |
} |
111 |
|
112 |
cset->chars = (Tcl_UniChar *) ckalloc(sizeof(Tcl_UniChar) |
113 |
* (end - format - 1)); |
114 |
if (nranges > 0) { |
115 |
cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges); |
116 |
} else { |
117 |
cset->ranges = NULL; |
118 |
} |
119 |
|
120 |
/* |
121 |
* Now build the character set. |
122 |
*/ |
123 |
|
124 |
cset->nchars = cset->nranges = 0; |
125 |
format += Tcl_UtfToUniChar(format, &ch); |
126 |
start = ch; |
127 |
if (ch == ']' || ch == '-') { |
128 |
cset->chars[cset->nchars++] = ch; |
129 |
format += Tcl_UtfToUniChar(format, &ch); |
130 |
} |
131 |
while (ch != ']') { |
132 |
if (*format == '-') { |
133 |
/* |
134 |
* This may be the first character of a range, so don't add |
135 |
* it yet. |
136 |
*/ |
137 |
|
138 |
start = ch; |
139 |
} else if (ch == '-') { |
140 |
/* |
141 |
* Check to see if this is the last character in the set, in which |
142 |
* case it is not a range and we should add the previous character |
143 |
* as well as the dash. |
144 |
*/ |
145 |
|
146 |
if (*format == ']') { |
147 |
cset->chars[cset->nchars++] = start; |
148 |
cset->chars[cset->nchars++] = ch; |
149 |
} else { |
150 |
format += Tcl_UtfToUniChar(format, &ch); |
151 |
|
152 |
/* |
153 |
* Check to see if the range is in reverse order. |
154 |
*/ |
155 |
|
156 |
if (start < ch) { |
157 |
cset->ranges[cset->nranges].start = start; |
158 |
cset->ranges[cset->nranges].end = ch; |
159 |
} else { |
160 |
cset->ranges[cset->nranges].start = ch; |
161 |
cset->ranges[cset->nranges].end = start; |
162 |
} |
163 |
cset->nranges++; |
164 |
} |
165 |
} else { |
166 |
cset->chars[cset->nchars++] = ch; |
167 |
} |
168 |
format += Tcl_UtfToUniChar(format, &ch); |
169 |
} |
170 |
return format; |
171 |
} |
172 |
|
173 |
/* |
174 |
*---------------------------------------------------------------------- |
175 |
* |
176 |
* CharInSet -- |
177 |
* |
178 |
* Check to see if a character matches the given set. |
179 |
* |
180 |
* Results: |
181 |
* Returns non-zero if the character matches the given set. |
182 |
* |
183 |
* Side effects: |
184 |
* None. |
185 |
* |
186 |
*---------------------------------------------------------------------- |
187 |
*/ |
188 |
|
189 |
static int |
190 |
CharInSet(cset, c) |
191 |
CharSet *cset; |
192 |
int c; /* Character to test, passed as int because |
193 |
* of non-ANSI prototypes. */ |
194 |
{ |
195 |
Tcl_UniChar ch = (Tcl_UniChar) c; |
196 |
int i, match = 0; |
197 |
for (i = 0; i < cset->nchars; i++) { |
198 |
if (cset->chars[i] == ch) { |
199 |
match = 1; |
200 |
break; |
201 |
} |
202 |
} |
203 |
if (!match) { |
204 |
for (i = 0; i < cset->nranges; i++) { |
205 |
if ((cset->ranges[i].start <= ch) |
206 |
&& (ch <= cset->ranges[i].end)) { |
207 |
match = 1; |
208 |
break; |
209 |
} |
210 |
} |
211 |
} |
212 |
return (cset->exclude ? !match : match); |
213 |
} |
214 |
|
215 |
/* |
216 |
*---------------------------------------------------------------------- |
217 |
* |
218 |
* ReleaseCharSet -- |
219 |
* |
220 |
* Free the storage associated with a character set. |
221 |
* |
222 |
* Results: |
223 |
* None. |
224 |
* |
225 |
* Side effects: |
226 |
* None. |
227 |
* |
228 |
*---------------------------------------------------------------------- |
229 |
*/ |
230 |
|
231 |
static void |
232 |
ReleaseCharSet(cset) |
233 |
CharSet *cset; |
234 |
{ |
235 |
ckfree((char *)cset->chars); |
236 |
if (cset->ranges) { |
237 |
ckfree((char *)cset->ranges); |
238 |
} |
239 |
} |
240 |
|
241 |
/* |
242 |
*---------------------------------------------------------------------- |
243 |
* |
244 |
* ValidateFormat -- |
245 |
* |
246 |
* Parse the format string and verify that it is properly formed |
247 |
* and that there are exactly enough variables on the command line. |
248 |
* |
249 |
* Results: |
250 |
* A standard Tcl result. |
251 |
* |
252 |
* Side effects: |
253 |
* May place an error in the interpreter result. |
254 |
* |
255 |
*---------------------------------------------------------------------- |
256 |
*/ |
257 |
|
258 |
static int |
259 |
ValidateFormat(interp, format, numVars, totalSubs) |
260 |
Tcl_Interp *interp; /* Current interpreter. */ |
261 |
char *format; /* The format string. */ |
262 |
int numVars; /* The number of variables passed to the |
263 |
* scan command. */ |
264 |
int *totalSubs; /* The number of variables that will be |
265 |
* required. */ |
266 |
{ |
267 |
#define STATIC_LIST_SIZE 16 |
268 |
int gotXpg, gotSequential, value, i, flags; |
269 |
char *end; |
270 |
Tcl_UniChar ch; |
271 |
int staticAssign[STATIC_LIST_SIZE]; |
272 |
int *nassign = staticAssign; |
273 |
int objIndex, xpgSize, nspace = STATIC_LIST_SIZE; |
274 |
|
275 |
/* |
276 |
* Initialize an array that records the number of times a variable |
277 |
* is assigned to by the format string. We use this to detect if |
278 |
* a variable is multiply assigned or left unassigned. |
279 |
*/ |
280 |
|
281 |
if (numVars > nspace) { |
282 |
nassign = (int*)ckalloc(sizeof(int) * numVars); |
283 |
nspace = numVars; |
284 |
} |
285 |
for (i = 0; i < nspace; i++) { |
286 |
nassign[i] = 0; |
287 |
} |
288 |
|
289 |
xpgSize = objIndex = gotXpg = gotSequential = 0; |
290 |
|
291 |
while (*format != '\0') { |
292 |
format += Tcl_UtfToUniChar(format, &ch); |
293 |
|
294 |
flags = 0; |
295 |
|
296 |
if (ch != '%') { |
297 |
continue; |
298 |
} |
299 |
format += Tcl_UtfToUniChar(format, &ch); |
300 |
if (ch == '%') { |
301 |
continue; |
302 |
} |
303 |
if (ch == '*') { |
304 |
flags |= SCAN_SUPPRESS; |
305 |
format += Tcl_UtfToUniChar(format, &ch); |
306 |
goto xpgCheckDone; |
307 |
} |
308 |
|
309 |
if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ |
310 |
/* |
311 |
* Check for an XPG3-style %n$ specification. Note: there |
312 |
* must not be a mixture of XPG3 specs and non-XPG3 specs |
313 |
* in the same format string. |
314 |
*/ |
315 |
|
316 |
value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */ |
317 |
if (*end != '$') { |
318 |
goto notXpg; |
319 |
} |
320 |
format = end+1; |
321 |
format += Tcl_UtfToUniChar(format, &ch); |
322 |
gotXpg = 1; |
323 |
if (gotSequential) { |
324 |
goto mixedXPG; |
325 |
} |
326 |
objIndex = value - 1; |
327 |
if ((objIndex < 0) || (numVars && (objIndex >= numVars))) { |
328 |
goto badIndex; |
329 |
} else if (numVars == 0) { |
330 |
/* |
331 |
* In the case where no vars are specified, the user can |
332 |
* specify %9999$ legally, so we have to consider special |
333 |
* rules for growing the assign array. 'value' is |
334 |
* guaranteed to be > 0. |
335 |
*/ |
336 |
xpgSize = (xpgSize > value) ? xpgSize : value; |
337 |
} |
338 |
goto xpgCheckDone; |
339 |
} |
340 |
|
341 |
notXpg: |
342 |
gotSequential = 1; |
343 |
if (gotXpg) { |
344 |
mixedXPG: |
345 |
Tcl_SetResult(interp, |
346 |
"cannot mix \"%\" and \"%n$\" conversion specifiers", |
347 |
TCL_STATIC); |
348 |
goto error; |
349 |
} |
350 |
|
351 |
xpgCheckDone: |
352 |
/* |
353 |
* Parse any width specifier. |
354 |
*/ |
355 |
|
356 |
if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ |
357 |
value = strtoul(format-1, &format, 10); /* INTL: "C" locale. */ |
358 |
flags |= SCAN_WIDTH; |
359 |
format += Tcl_UtfToUniChar(format, &ch); |
360 |
} |
361 |
|
362 |
/* |
363 |
* Ignore size specifier. |
364 |
*/ |
365 |
|
366 |
if ((ch == 'l') || (ch == 'L') || (ch == 'h')) { |
367 |
format += Tcl_UtfToUniChar(format, &ch); |
368 |
} |
369 |
|
370 |
if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) { |
371 |
goto badIndex; |
372 |
} |
373 |
|
374 |
/* |
375 |
* Handle the various field types. |
376 |
*/ |
377 |
|
378 |
switch (ch) { |
379 |
case 'n': |
380 |
case 'd': |
381 |
case 'i': |
382 |
case 'o': |
383 |
case 'x': |
384 |
case 'u': |
385 |
case 'f': |
386 |
case 'e': |
387 |
case 'g': |
388 |
case 's': |
389 |
break; |
390 |
case 'c': |
391 |
if (flags & SCAN_WIDTH) { |
392 |
Tcl_SetResult(interp, "field width may not be specified in %c conversion", TCL_STATIC); |
393 |
goto error; |
394 |
} |
395 |
break; |
396 |
case '[': |
397 |
if (*format == '\0') { |
398 |
goto badSet; |
399 |
} |
400 |
format += Tcl_UtfToUniChar(format, &ch); |
401 |
if (ch == '^') { |
402 |
if (*format == '\0') { |
403 |
goto badSet; |
404 |
} |
405 |
format += Tcl_UtfToUniChar(format, &ch); |
406 |
} |
407 |
if (ch == ']') { |
408 |
if (*format == '\0') { |
409 |
goto badSet; |
410 |
} |
411 |
format += Tcl_UtfToUniChar(format, &ch); |
412 |
} |
413 |
while (ch != ']') { |
414 |
if (*format == '\0') { |
415 |
goto badSet; |
416 |
} |
417 |
format += Tcl_UtfToUniChar(format, &ch); |
418 |
} |
419 |
break; |
420 |
badSet: |
421 |
Tcl_SetResult(interp, "unmatched [ in format string", |
422 |
TCL_STATIC); |
423 |
goto error; |
424 |
default: |
425 |
{ |
426 |
char buf[TCL_UTF_MAX+1]; |
427 |
|
428 |
buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; |
429 |
Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), |
430 |
"bad scan conversion character \"", buf, "\"", NULL); |
431 |
goto error; |
432 |
} |
433 |
} |
434 |
if (!(flags & SCAN_SUPPRESS)) { |
435 |
if (objIndex >= nspace) { |
436 |
/* |
437 |
* Expand the nassign buffer. If we are using XPG specifiers, |
438 |
* make sure that we grow to a large enough size. xpgSize is |
439 |
* guaranteed to be at least one larger than objIndex. |
440 |
*/ |
441 |
value = nspace; |
442 |
if (xpgSize) { |
443 |
nspace = xpgSize; |
444 |
} else { |
445 |
nspace += STATIC_LIST_SIZE; |
446 |
} |
447 |
if (nassign == staticAssign) { |
448 |
nassign = (void *)ckalloc(nspace * sizeof(int)); |
449 |
for (i = 0; i < STATIC_LIST_SIZE; ++i) { |
450 |
nassign[i] = staticAssign[i]; |
451 |
} |
452 |
} else { |
453 |
nassign = (void *)ckrealloc((void *)nassign, |
454 |
nspace * sizeof(int)); |
455 |
} |
456 |
for (i = value; i < nspace; i++) { |
457 |
nassign[i] = 0; |
458 |
} |
459 |
} |
460 |
nassign[objIndex]++; |
461 |
objIndex++; |
462 |
} |
463 |
} |
464 |
|
465 |
/* |
466 |
* Verify that all of the variable were assigned exactly once. |
467 |
*/ |
468 |
|
469 |
if (numVars == 0) { |
470 |
if (xpgSize) { |
471 |
numVars = xpgSize; |
472 |
} else { |
473 |
numVars = objIndex; |
474 |
} |
475 |
} |
476 |
if (totalSubs) { |
477 |
*totalSubs = numVars; |
478 |
} |
479 |
for (i = 0; i < numVars; i++) { |
480 |
if (nassign[i] > 1) { |
481 |
Tcl_SetResult(interp, "variable is assigned by multiple \"%n$\" conversion specifiers", TCL_STATIC); |
482 |
goto error; |
483 |
} else if (!xpgSize && (nassign[i] == 0)) { |
484 |
/* |
485 |
* If the space is empty, and xpgSize is 0 (means XPG wasn't |
486 |
* used, and/or numVars != 0), then too many vars were given |
487 |
*/ |
488 |
Tcl_SetResult(interp, "variable is not assigned by any conversion specifiers", TCL_STATIC); |
489 |
goto error; |
490 |
} |
491 |
} |
492 |
|
493 |
if (nassign != staticAssign) { |
494 |
ckfree((char *)nassign); |
495 |
} |
496 |
return TCL_OK; |
497 |
|
498 |
badIndex: |
499 |
if (gotXpg) { |
500 |
Tcl_SetResult(interp, "\"%n$\" argument index out of range", |
501 |
TCL_STATIC); |
502 |
} else { |
503 |
Tcl_SetResult(interp, |
504 |
"different numbers of variable names and field specifiers", |
505 |
TCL_STATIC); |
506 |
} |
507 |
|
508 |
error: |
509 |
if (nassign != staticAssign) { |
510 |
ckfree((char *)nassign); |
511 |
} |
512 |
return TCL_ERROR; |
513 |
#undef STATIC_LIST_SIZE |
514 |
} |
515 |
|
516 |
/* |
517 |
*---------------------------------------------------------------------- |
518 |
* |
519 |
* Tcl_ScanObjCmd -- |
520 |
* |
521 |
* This procedure is invoked to process the "scan" Tcl command. |
522 |
* See the user documentation for details on what it does. |
523 |
* |
524 |
* Results: |
525 |
* A standard Tcl result. |
526 |
* |
527 |
* Side effects: |
528 |
* See the user documentation. |
529 |
* |
530 |
*---------------------------------------------------------------------- |
531 |
*/ |
532 |
|
533 |
/* ARGSUSED */ |
534 |
int |
535 |
Tcl_ScanObjCmd(dummy, interp, objc, objv) |
536 |
ClientData dummy; /* Not used. */ |
537 |
Tcl_Interp *interp; /* Current interpreter. */ |
538 |
int objc; /* Number of arguments. */ |
539 |
Tcl_Obj *CONST objv[]; /* Argument objects. */ |
540 |
{ |
541 |
char *format; |
542 |
int numVars, nconversions, totalVars = -1; |
543 |
int objIndex, offset, i, value, result, code; |
544 |
char *string, *end, *baseString; |
545 |
char op = 0; |
546 |
int base = 0; |
547 |
int underflow = 0; |
548 |
size_t width; |
549 |
long (*fn)() = NULL; |
550 |
Tcl_UniChar ch, sch; |
551 |
Tcl_Obj **objs = NULL, *objPtr = NULL; |
552 |
int flags; |
553 |
char buf[513]; /* Temporary buffer to hold scanned |
554 |
* number strings before they are |
555 |
* passed to strtoul. */ |
556 |
|
557 |
if (objc < 3) { |
558 |
Tcl_WrongNumArgs(interp, 1, objv, |
559 |
"string format ?varName varName ...?"); |
560 |
return TCL_ERROR; |
561 |
} |
562 |
|
563 |
format = Tcl_GetStringFromObj(objv[2], NULL); |
564 |
numVars = objc-3; |
565 |
|
566 |
/* |
567 |
* Check for errors in the format string. |
568 |
*/ |
569 |
|
570 |
if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) { |
571 |
return TCL_ERROR; |
572 |
} |
573 |
|
574 |
/* |
575 |
* Allocate space for the result objects. |
576 |
*/ |
577 |
|
578 |
if (totalVars > 0) { |
579 |
objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars); |
580 |
for (i = 0; i < totalVars; i++) { |
581 |
objs[i] = NULL; |
582 |
} |
583 |
} |
584 |
|
585 |
string = Tcl_GetStringFromObj(objv[1], NULL); |
586 |
baseString = string; |
587 |
|
588 |
/* |
589 |
* Iterate over the format string filling in the result objects until |
590 |
* we reach the end of input, the end of the format string, or there |
591 |
* is a mismatch. |
592 |
*/ |
593 |
|
594 |
objIndex = 0; |
595 |
nconversions = 0; |
596 |
while (*format != '\0') { |
597 |
format += Tcl_UtfToUniChar(format, &ch); |
598 |
|
599 |
flags = 0; |
600 |
|
601 |
/* |
602 |
* If we see whitespace in the format, skip whitespace in the string. |
603 |
*/ |
604 |
|
605 |
if (Tcl_UniCharIsSpace(ch)) { |
606 |
offset = Tcl_UtfToUniChar(string, &sch); |
607 |
while (Tcl_UniCharIsSpace(sch)) { |
608 |
if (*string == '\0') { |
609 |
goto done; |
610 |
} |
611 |
string += offset; |
612 |
offset = Tcl_UtfToUniChar(string, &sch); |
613 |
} |
614 |
continue; |
615 |
} |
616 |
|
617 |
if (ch != '%') { |
618 |
literal: |
619 |
if (*string == '\0') { |
620 |
underflow = 1; |
621 |
goto done; |
622 |
} |
623 |
string += Tcl_UtfToUniChar(string, &sch); |
624 |
if (ch != sch) { |
625 |
goto done; |
626 |
} |
627 |
continue; |
628 |
} |
629 |
|
630 |
format += Tcl_UtfToUniChar(format, &ch); |
631 |
if (ch == '%') { |
632 |
goto literal; |
633 |
} |
634 |
|
635 |
/* |
636 |
* Check for assignment suppression ('*') or an XPG3-style |
637 |
* assignment ('%n$'). |
638 |
*/ |
639 |
|
640 |
if (ch == '*') { |
641 |
flags |= SCAN_SUPPRESS; |
642 |
format += Tcl_UtfToUniChar(format, &ch); |
643 |
} else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ |
644 |
value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */ |
645 |
if (*end == '$') { |
646 |
format = end+1; |
647 |
format += Tcl_UtfToUniChar(format, &ch); |
648 |
objIndex = value - 1; |
649 |
} |
650 |
} |
651 |
|
652 |
/* |
653 |
* Parse any width specifier. |
654 |
*/ |
655 |
|
656 |
if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ |
657 |
width = strtoul(format-1, &format, 10); /* INTL: "C" locale. */ |
658 |
format += Tcl_UtfToUniChar(format, &ch); |
659 |
} else { |
660 |
width = 0; |
661 |
} |
662 |
|
663 |
/* |
664 |
* Ignore size specifier. |
665 |
*/ |
666 |
|
667 |
if ((ch == 'l') || (ch == 'L') || (ch == 'h')) { |
668 |
format += Tcl_UtfToUniChar(format, &ch); |
669 |
} |
670 |
|
671 |
/* |
672 |
* Handle the various field types. |
673 |
*/ |
674 |
|
675 |
switch (ch) { |
676 |
case 'n': |
677 |
if (!(flags & SCAN_SUPPRESS)) { |
678 |
objPtr = Tcl_NewIntObj(string - baseString); |
679 |
Tcl_IncrRefCount(objPtr); |
680 |
objs[objIndex++] = objPtr; |
681 |
} |
682 |
nconversions++; |
683 |
continue; |
684 |
|
685 |
case 'd': |
686 |
op = 'i'; |
687 |
base = 10; |
688 |
fn = (long (*)())strtol; |
689 |
break; |
690 |
case 'i': |
691 |
op = 'i'; |
692 |
base = 0; |
693 |
fn = (long (*)())strtol; |
694 |
break; |
695 |
case 'o': |
696 |
op = 'i'; |
697 |
base = 8; |
698 |
fn = (long (*)())strtol; |
699 |
break; |
700 |
case 'x': |
701 |
op = 'i'; |
702 |
base = 16; |
703 |
fn = (long (*)())strtol; |
704 |
break; |
705 |
case 'u': |
706 |
op = 'i'; |
707 |
base = 10; |
708 |
flags |= SCAN_UNSIGNED; |
709 |
fn = (long (*)())strtoul; |
710 |
break; |
711 |
|
712 |
case 'f': |
713 |
case 'e': |
714 |
case 'g': |
715 |
op = 'f'; |
716 |
break; |
717 |
|
718 |
case 's': |
719 |
op = 's'; |
720 |
break; |
721 |
|
722 |
case 'c': |
723 |
op = 'c'; |
724 |
flags |= SCAN_NOSKIP; |
725 |
break; |
726 |
case '[': |
727 |
op = '['; |
728 |
flags |= SCAN_NOSKIP; |
729 |
break; |
730 |
} |
731 |
|
732 |
/* |
733 |
* At this point, we will need additional characters from the |
734 |
* string to proceed. |
735 |
*/ |
736 |
|
737 |
if (*string == '\0') { |
738 |
underflow = 1; |
739 |
goto done; |
740 |
} |
741 |
|
742 |
/* |
743 |
* Skip any leading whitespace at the beginning of a field unless |
744 |
* the format suppresses this behavior. |
745 |
*/ |
746 |
|
747 |
if (!(flags & SCAN_NOSKIP)) { |
748 |
while (*string != '\0') { |
749 |
offset = Tcl_UtfToUniChar(string, &sch); |
750 |
if (!Tcl_UniCharIsSpace(sch)) { |
751 |
break; |
752 |
} |
753 |
string += offset; |
754 |
} |
755 |
if (*string == '\0') { |
756 |
underflow = 1; |
757 |
goto done; |
758 |
} |
759 |
} |
760 |
|
761 |
/* |
762 |
* Perform the requested scanning operation. |
763 |
*/ |
764 |
|
765 |
switch (op) { |
766 |
case 's': |
767 |
/* |
768 |
* Scan a string up to width characters or whitespace. |
769 |
*/ |
770 |
|
771 |
if (width == 0) { |
772 |
width = (size_t) ~0; |
773 |
} |
774 |
end = string; |
775 |
while (*end != '\0') { |
776 |
offset = Tcl_UtfToUniChar(end, &sch); |
777 |
if (Tcl_UniCharIsSpace(sch)) { |
778 |
break; |
779 |
} |
780 |
end += offset; |
781 |
if (--width == 0) { |
782 |
break; |
783 |
} |
784 |
} |
785 |
if (!(flags & SCAN_SUPPRESS)) { |
786 |
objPtr = Tcl_NewStringObj(string, end-string); |
787 |
Tcl_IncrRefCount(objPtr); |
788 |
objs[objIndex++] = objPtr; |
789 |
} |
790 |
string = end; |
791 |
break; |
792 |
|
793 |
case '[': { |
794 |
CharSet cset; |
795 |
|
796 |
if (width == 0) { |
797 |
width = (size_t) ~0; |
798 |
} |
799 |
end = string; |
800 |
|
801 |
format = BuildCharSet(&cset, format); |
802 |
while (*end != '\0') { |
803 |
offset = Tcl_UtfToUniChar(end, &sch); |
804 |
if (!CharInSet(&cset, (int)sch)) { |
805 |
break; |
806 |
} |
807 |
end += offset; |
808 |
if (--width == 0) { |
809 |
break; |
810 |
} |
811 |
} |
812 |
ReleaseCharSet(&cset); |
813 |
|
814 |
if (string == end) { |
815 |
/* |
816 |
* Nothing matched the range, stop processing |
817 |
*/ |
818 |
goto done; |
819 |
} |
820 |
if (!(flags & SCAN_SUPPRESS)) { |
821 |
objPtr = Tcl_NewStringObj(string, end-string); |
822 |
Tcl_IncrRefCount(objPtr); |
823 |
objs[objIndex++] = objPtr; |
824 |
} |
825 |
string = end; |
826 |
|
827 |
break; |
828 |
} |
829 |
case 'c': |
830 |
/* |
831 |
* Scan a single Unicode character. |
832 |
*/ |
833 |
|
834 |
string += Tcl_UtfToUniChar(string, &sch); |
835 |
if (!(flags & SCAN_SUPPRESS)) { |
836 |
objPtr = Tcl_NewIntObj((int)sch); |
837 |
Tcl_IncrRefCount(objPtr); |
838 |
objs[objIndex++] = objPtr; |
839 |
} |
840 |
break; |
841 |
|
842 |
case 'i': |
843 |
/* |
844 |
* Scan an unsigned or signed integer. |
845 |
*/ |
846 |
|
847 |
if ((width == 0) || (width > sizeof(buf) - 1)) { |
848 |
width = sizeof(buf) - 1; |
849 |
} |
850 |
flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO; |
851 |
for (end = buf; width > 0; width--) { |
852 |
switch (*string) { |
853 |
/* |
854 |
* The 0 digit has special meaning at the beginning of |
855 |
* a number. If we are unsure of the base, it |
856 |
* indicates that we are in base 8 or base 16 (if it is |
857 |
* followed by an 'x'). |
858 |
*/ |
859 |
case '0': |
860 |
if (base == 0) { |
861 |
base = 8; |
862 |
flags |= SCAN_XOK; |
863 |
} |
864 |
if (flags & SCAN_NOZERO) { |
865 |
flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS |
866 |
| SCAN_NOZERO); |
867 |
} else { |
868 |
flags &= ~(SCAN_SIGNOK | SCAN_XOK |
869 |
| SCAN_NODIGITS); |
870 |
} |
871 |
goto addToInt; |
872 |
|
873 |
case '1': case '2': case '3': case '4': |
874 |
case '5': case '6': case '7': |
875 |
if (base == 0) { |
876 |
base = 10; |
877 |
} |
878 |
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
879 |
goto addToInt; |
880 |
|
881 |
case '8': case '9': |
882 |
if (base == 0) { |
883 |
base = 10; |
884 |
} |
885 |
if (base <= 8) { |
886 |
break; |
887 |
} |
888 |
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
889 |
goto addToInt; |
890 |
|
891 |
case 'A': case 'B': case 'C': |
892 |
case 'D': case 'E': case 'F': |
893 |
case 'a': case 'b': case 'c': |
894 |
case 'd': case 'e': case 'f': |
895 |
if (base <= 10) { |
896 |
break; |
897 |
} |
898 |
flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
899 |
goto addToInt; |
900 |
|
901 |
case '+': case '-': |
902 |
if (flags & SCAN_SIGNOK) { |
903 |
flags &= ~SCAN_SIGNOK; |
904 |
goto addToInt; |
905 |
} |
906 |
break; |
907 |
|
908 |
case 'x': case 'X': |
909 |
if ((flags & SCAN_XOK) && (end == buf+1)) { |
910 |
base = 16; |
911 |
flags &= ~SCAN_XOK; |
912 |
goto addToInt; |
913 |
} |
914 |
break; |
915 |
} |
916 |
|
917 |
/* |
918 |
* We got an illegal character so we are done accumulating. |
919 |
*/ |
920 |
|
921 |
break; |
922 |
|
923 |
addToInt: |
924 |
/* |
925 |
* Add the character to the temporary buffer. |
926 |
*/ |
927 |
|
928 |
*end++ = *string++; |
929 |
if (*string == '\0') { |
930 |
break; |
931 |
} |
932 |
} |
933 |
|
934 |
/* |
935 |
* Check to see if we need to back up because we only got a |
936 |
* sign or a trailing x after a 0. |
937 |
*/ |
938 |
|
939 |
if (flags & SCAN_NODIGITS) { |
940 |
if (*string == '\0') { |
941 |
underflow = 1; |
942 |
} |
943 |
goto done; |
944 |
} else if (end[-1] == 'x' || end[-1] == 'X') { |
945 |
end--; |
946 |
string--; |
947 |
} |
948 |
|
949 |
|
950 |
/* |
951 |
* Scan the value from the temporary buffer. If we are |
952 |
* returning a large unsigned value, we have to convert it back |
953 |
* to a string since Tcl only supports signed values. |
954 |
*/ |
955 |
|
956 |
if (!(flags & SCAN_SUPPRESS)) { |
957 |
*end = '\0'; |
958 |
value = (int) (*fn)(buf, NULL, base); |
959 |
if ((flags & SCAN_UNSIGNED) && (value < 0)) { |
960 |
sprintf(buf, "%u", value); /* INTL: ISO digit */ |
961 |
objPtr = Tcl_NewStringObj(buf, -1); |
962 |
} else { |
963 |
objPtr = Tcl_NewIntObj(value); |
964 |
} |
965 |
Tcl_IncrRefCount(objPtr); |
966 |
objs[objIndex++] = objPtr; |
967 |
} |
968 |
|
969 |
break; |
970 |
|
971 |
case 'f': |
972 |
/* |
973 |
* Scan a floating point number |
974 |
*/ |
975 |
|
976 |
if ((width == 0) || (width > sizeof(buf) - 1)) { |
977 |
width = sizeof(buf) - 1; |
978 |
} |
979 |
flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK; |
980 |
for (end = buf; width > 0; width--) { |
981 |
switch (*string) { |
982 |
case '0': case '1': case '2': case '3': |
983 |
case '4': case '5': case '6': case '7': |
984 |
case '8': case '9': |
985 |
flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS); |
986 |
goto addToFloat; |
987 |
case '+': case '-': |
988 |
if (flags & SCAN_SIGNOK) { |
989 |
flags &= ~SCAN_SIGNOK; |
990 |
goto addToFloat; |
991 |
} |
992 |
break; |
993 |
case '.': |
994 |
if (flags & SCAN_PTOK) { |
995 |
flags &= ~(SCAN_SIGNOK | SCAN_PTOK); |
996 |
goto addToFloat; |
997 |
} |
998 |
break; |
999 |
case 'e': case 'E': |
1000 |
/* |
1001 |
* An exponent is not allowed until there has |
1002 |
* been at least one digit. |
1003 |
*/ |
1004 |
|
1005 |
if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) |
1006 |
== SCAN_EXPOK) { |
1007 |
flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK)) |
1008 |
| SCAN_SIGNOK | SCAN_NODIGITS; |
1009 |
goto addToFloat; |
1010 |
} |
1011 |
break; |
1012 |
} |
1013 |
|
1014 |
/* |
1015 |
* We got an illegal character so we are done accumulating. |
1016 |
*/ |
1017 |
|
1018 |
break; |
1019 |
|
1020 |
addToFloat: |
1021 |
/* |
1022 |
* Add the character to the temporary buffer. |
1023 |
*/ |
1024 |
|
1025 |
*end++ = *string++; |
1026 |
if (*string == '\0') { |
1027 |
break; |
1028 |
} |
1029 |
} |
1030 |
|
1031 |
/* |
1032 |
* Check to see if we need to back up because we saw a |
1033 |
* trailing 'e' or sign. |
1034 |
*/ |
1035 |
|
1036 |
if (flags & SCAN_NODIGITS) { |
1037 |
if (flags & SCAN_EXPOK) { |
1038 |
/* |
1039 |
* There were no digits at all so scanning has |
1040 |
* failed and we are done. |
1041 |
*/ |
1042 |
if (*string == '\0') { |
1043 |
underflow = 1; |
1044 |
} |
1045 |
goto done; |
1046 |
} |
1047 |
|
1048 |
/* |
1049 |
* We got a bad exponent ('e' and maybe a sign). |
1050 |
*/ |
1051 |
|
1052 |
end--; |
1053 |
string--; |
1054 |
if (*end != 'e' && *end != 'E') { |
1055 |
end--; |
1056 |
string--; |
1057 |
} |
1058 |
} |
1059 |
|
1060 |
/* |
1061 |
* Scan the value from the temporary buffer. |
1062 |
*/ |
1063 |
|
1064 |
if (!(flags & SCAN_SUPPRESS)) { |
1065 |
double dvalue; |
1066 |
*end = '\0'; |
1067 |
dvalue = strtod(buf, NULL); |
1068 |
objPtr = Tcl_NewDoubleObj(dvalue); |
1069 |
Tcl_IncrRefCount(objPtr); |
1070 |
objs[objIndex++] = objPtr; |
1071 |
} |
1072 |
break; |
1073 |
} |
1074 |
nconversions++; |
1075 |
} |
1076 |
|
1077 |
done: |
1078 |
result = 0; |
1079 |
code = TCL_OK; |
1080 |
|
1081 |
if (numVars) { |
1082 |
/* |
1083 |
* In this case, variables were specified (classic scan) |
1084 |
*/ |
1085 |
for (i = 0; i < totalVars; i++) { |
1086 |
if (objs[i] != NULL) { |
1087 |
result++; |
1088 |
if (Tcl_ObjSetVar2(interp, objv[i+3], NULL, |
1089 |
objs[i], 0) == NULL) { |
1090 |
Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), |
1091 |
"couldn't set variable \"", |
1092 |
Tcl_GetString(objv[i+3]), "\"", (char *) NULL); |
1093 |
code = TCL_ERROR; |
1094 |
} |
1095 |
Tcl_DecrRefCount(objs[i]); |
1096 |
} |
1097 |
} |
1098 |
} else { |
1099 |
/* |
1100 |
* Here no vars were specified, we want a list returned (inline scan) |
1101 |
*/ |
1102 |
objPtr = Tcl_NewObj(); |
1103 |
for (i = 0; i < totalVars; i++) { |
1104 |
if (objs[i] != NULL) { |
1105 |
Tcl_ListObjAppendElement(NULL, objPtr, objs[i]); |
1106 |
Tcl_DecrRefCount(objs[i]); |
1107 |
} else { |
1108 |
/* |
1109 |
* More %-specifiers than matching chars, so we |
1110 |
* just spit out empty strings for these |
1111 |
*/ |
1112 |
Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj()); |
1113 |
} |
1114 |
} |
1115 |
} |
1116 |
ckfree((char*) objs); |
1117 |
if (code == TCL_OK) { |
1118 |
if (underflow && (nconversions == 0)) { |
1119 |
if (numVars) { |
1120 |
objPtr = Tcl_NewIntObj(-1); |
1121 |
} else { |
1122 |
if (objPtr) { |
1123 |
Tcl_SetListObj(objPtr, 0, NULL); |
1124 |
} else { |
1125 |
objPtr = Tcl_NewObj(); |
1126 |
} |
1127 |
} |
1128 |
} else if (numVars) { |
1129 |
objPtr = Tcl_NewIntObj(result); |
1130 |
} |
1131 |
Tcl_SetObjResult(interp, objPtr); |
1132 |
} |
1133 |
return code; |
1134 |
} |
1135 |
|
1136 |
/* End of tclscan.c */ |