/[dtapublic]/projs/trunk/shared_source/tcl_base/tclregexp.c
ViewVC logotype

Contents of /projs/trunk/shared_source/tcl_base/tclregexp.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 42 - (show annotations) (download)
Fri Oct 14 01:50:00 2016 UTC (7 years, 5 months ago) by dashley
File MIME type: text/plain
File size: 31427 byte(s)
Move shared source code to commonize.
1 /* $Header: /cvsroot/esrg/sfesrg/esrgpcpj/shared/tcl_base/tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $ */
2
3 /*
4 * tclRegexp.c --
5 *
6 * This file contains the public interfaces to the Tcl regular
7 * expression mechanism.
8 *
9 * Copyright (c) 1998 by Sun Microsystems, Inc.
10 * Copyright (c) 1998-1999 by Scriptics Corporation.
11 *
12 * See the file "license.terms" for information on usage and redistribution
13 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
14 *
15 * RCS: @(#) $Id: tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $
16 */
17
18 #include "tclInt.h"
19 #include "tclPort.h"
20 #include "tclRegexp.h"
21
22 /*
23 *----------------------------------------------------------------------
24 * The routines in this file use Henry Spencer's regular expression
25 * package contained in the following additional source files:
26 *
27 * regc_color.c regc_cvec.c regc_lex.c
28 * regc_nfa.c regcomp.c regcustom.h
29 * rege_dfa.c regerror.c regerrs.h
30 * regex.h regexec.c regfree.c
31 * regfronts.c regguts.h
32 *
33 * Copyright (c) 1998 Henry Spencer. All rights reserved.
34 *
35 * Development of this software was funded, in part, by Cray Research Inc.,
36 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
37 * Corporation, none of whom are responsible for the results. The author
38 * thanks all of them.
39 *
40 * Redistribution and use in source and binary forms -- with or without
41 * modification -- are permitted for any purpose, provided that
42 * redistributions in source form retain this entire copyright notice and
43 * indicate the origin and nature of any modifications.
44 *
45 * I'd appreciate being given credit for this package in the documentation
46 * of software which uses it, but that is not a requirement.
47 *
48 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
49 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
50 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
51 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
52 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
53 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
54 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
55 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
56 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
57 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58 *
59 * *** NOTE: this code has been altered slightly for use in Tcl: ***
60 * *** 1. Names have been changed, e.g. from re_comp to ***
61 * *** TclRegComp, to avoid clashes with other ***
62 * *** regexp implementations used by applications. ***
63 */
64
65 /*
66 * Thread local storage used to maintain a per-thread cache of compiled
67 * regular expressions.
68 */
69
70 #define NUM_REGEXPS 30
71
72 typedef struct ThreadSpecificData {
73 int initialized; /* Set to 1 when the module is initialized. */
74 char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
75 * regular expression patterns. NULL
76 * means that this slot isn't used.
77 * Malloc-ed. */
78 int patLengths[NUM_REGEXPS];/* Number of non-null characters in
79 * corresponding entry in patterns.
80 * -1 means entry isn't used. */
81 struct TclRegexp *regexps[NUM_REGEXPS];
82 /* Compiled forms of above strings. Also
83 * malloc-ed, or NULL if not in use yet. */
84 } ThreadSpecificData;
85
86 static Tcl_ThreadDataKey dataKey;
87
88 /*
89 * Declarations for functions used only in this file.
90 */
91
92 static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
93 char *pattern, int length, int flags));
94 static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
95 Tcl_Obj *copyPtr));
96 static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
97 static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
98 static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
99 static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
100 Tcl_RegExp re, CONST Tcl_UniChar *uniString,
101 int numChars, int nmatches, int flags));
102 static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
103 Tcl_Obj *objPtr));
104
105 /*
106 * The regular expression Tcl object type. This serves as a cache
107 * of the compiled form of the regular expression.
108 */
109
110 Tcl_ObjType tclRegexpType = {
111 "regexp", /* name */
112 FreeRegexpInternalRep, /* freeIntRepProc */
113 DupRegexpInternalRep, /* dupIntRepProc */
114 NULL, /* updateStringProc */
115 SetRegexpFromAny /* setFromAnyProc */
116 };
117
118
119 /*
120 *----------------------------------------------------------------------
121 *
122 * Tcl_RegExpCompile --
123 *
124 * Compile a regular expression into a form suitable for fast
125 * matching. This procedure is DEPRECATED in favor of the
126 * object version of the command.
127 *
128 * Results:
129 * The return value is a pointer to the compiled form of string,
130 * suitable for passing to Tcl_RegExpExec. This compiled form
131 * is only valid up until the next call to this procedure, so
132 * don't keep these around for a long time! If an error occurred
133 * while compiling the pattern, then NULL is returned and an error
134 * message is left in the interp's result.
135 *
136 * Side effects:
137 * Updates the cache of compiled regexps.
138 *
139 *----------------------------------------------------------------------
140 */
141
142 Tcl_RegExp
143 Tcl_RegExpCompile(interp, string)
144 Tcl_Interp *interp; /* For use in error reporting and
145 * to access the interp regexp cache. */
146 char *string; /* String for which to produce
147 * compiled regular expression. */
148 {
149 return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
150 REG_ADVANCED);
151 }
152
153 /*
154 *----------------------------------------------------------------------
155 *
156 * Tcl_RegExpExec --
157 *
158 * Execute the regular expression matcher using a compiled form
159 * of a regular expression and save information about any match
160 * that is found.
161 *
162 * Results:
163 * If an error occurs during the matching operation then -1
164 * is returned and the interp's result contains an error message.
165 * Otherwise the return value is 1 if a matching range is
166 * found and 0 if there is no matching range.
167 *
168 * Side effects:
169 * None.
170 *
171 *----------------------------------------------------------------------
172 */
173
174 int
175 Tcl_RegExpExec(interp, re, string, start)
176 Tcl_Interp *interp; /* Interpreter to use for error reporting. */
177 Tcl_RegExp re; /* Compiled regular expression; must have
178 * been returned by previous call to
179 * Tcl_GetRegExpFromObj. */
180 CONST char *string; /* String against which to match re. */
181 CONST char *start; /* If string is part of a larger string,
182 * this identifies beginning of larger
183 * string, so that "^" won't match. */
184 {
185 int flags, result, numChars;
186 TclRegexp *regexp = (TclRegexp *)re;
187 Tcl_DString ds;
188 Tcl_UniChar *ustr;
189
190 /*
191 * If the starting point is offset from the beginning of the buffer,
192 * then we need to tell the regexp engine not to match "^".
193 */
194
195 if (string > start) {
196 flags = REG_NOTBOL;
197 } else {
198 flags = 0;
199 }
200
201 /*
202 * Remember the string for use by Tcl_RegExpRange().
203 */
204
205 regexp->string = string;
206 regexp->objPtr = NULL;
207
208 /*
209 * Convert the string to Unicode and perform the match.
210 */
211
212 Tcl_DStringInit(&ds);
213 ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
214 numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
215 result = RegExpExecUniChar(interp, re, ustr, numChars,
216 -1 /* nmatches */, flags);
217 Tcl_DStringFree(&ds);
218
219 return result;
220 }
221
222 /*
223 *---------------------------------------------------------------------------
224 *
225 * Tcl_RegExpRange --
226 *
227 * Returns pointers describing the range of a regular expression match,
228 * or one of the subranges within the match.
229 *
230 * Results:
231 * The variables at *startPtr and *endPtr are modified to hold the
232 * addresses of the endpoints of the range given by index. If the
233 * specified range doesn't exist then NULLs are returned.
234 *
235 * Side effects:
236 * None.
237 *
238 *---------------------------------------------------------------------------
239 */
240
241 void
242 Tcl_RegExpRange(re, index, startPtr, endPtr)
243 Tcl_RegExp re; /* Compiled regular expression that has
244 * been passed to Tcl_RegExpExec. */
245 int index; /* 0 means give the range of the entire
246 * match, > 0 means give the range of
247 * a matching subrange. */
248 char **startPtr; /* Store address of first character in
249 * (sub-) range here. */
250 char **endPtr; /* Store address of character just after last
251 * in (sub-) range here. */
252 {
253 TclRegexp *regexpPtr = (TclRegexp *) re;
254 CONST char *string;
255
256 if ((size_t) index > regexpPtr->re.re_nsub) {
257 *startPtr = *endPtr = NULL;
258 } else if (regexpPtr->matches[index].rm_so < 0) {
259 *startPtr = *endPtr = NULL;
260 } else {
261 if (regexpPtr->objPtr) {
262 string = Tcl_GetString(regexpPtr->objPtr);
263 } else {
264 string = regexpPtr->string;
265 }
266 *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
267 *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
268 }
269 }
270
271 /*
272 *---------------------------------------------------------------------------
273 *
274 * RegExpExecUniChar --
275 *
276 * Execute the regular expression matcher using a compiled form of a
277 * regular expression and save information about any match that is
278 * found.
279 *
280 * Results:
281 * If an error occurs during the matching operation then -1 is
282 * returned and an error message is left in interp's result.
283 * Otherwise the return value is 1 if a matching range was found or
284 * 0 if there was no matching range.
285 *
286 * Side effects:
287 * None.
288 *
289 *----------------------------------------------------------------------
290 */
291
292 static int
293 RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
294 Tcl_Interp *interp; /* Interpreter to use for error reporting. */
295 Tcl_RegExp re; /* Compiled regular expression; returned by
296 * a previous call to Tcl_GetRegExpFromObj */
297 CONST Tcl_UniChar *wString; /* String against which to match re. */
298 int numChars; /* Length of Tcl_UniChar string (must
299 * be >= 0). */
300 int nmatches; /* How many subexpression matches (counting
301 * the whole match as subexpression 0) are
302 * of interest. -1 means "don't know". */
303 int flags; /* Regular expression flags. */
304 {
305 int status;
306 TclRegexp *regexpPtr = (TclRegexp *) re;
307 size_t last = regexpPtr->re.re_nsub + 1;
308 size_t nm = last;
309
310 if (nmatches >= 0 && (size_t) nmatches < nm) {
311 nm = (size_t) nmatches;
312 }
313
314 status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
315 &regexpPtr->details, nm, regexpPtr->matches, flags);
316
317 /*
318 * Check for errors.
319 */
320
321 if (status != REG_OKAY) {
322 if (status == REG_NOMATCH) {
323 return 0;
324 }
325 if (interp != NULL) {
326 TclRegError(interp, "error while matching regular expression: ",
327 status);
328 }
329 return -1;
330 }
331 return 1;
332 }
333
334 /*
335 *---------------------------------------------------------------------------
336 *
337 * TclRegExpRangeUniChar --
338 *
339 * Returns pointers describing the range of a regular expression match,
340 * or one of the subranges within the match, or the hypothetical range
341 * represented by the rm_extend field of the rm_detail_t.
342 *
343 * Results:
344 * The variables at *startPtr and *endPtr are modified to hold the
345 * offsets of the endpoints of the range given by index. If the
346 * specified range doesn't exist then -1s are supplied.
347 *
348 * Side effects:
349 * None.
350 *
351 *---------------------------------------------------------------------------
352 */
353
354 void
355 TclRegExpRangeUniChar(re, index, startPtr, endPtr)
356 Tcl_RegExp re; /* Compiled regular expression that has
357 * been passed to Tcl_RegExpExec. */
358 int index; /* 0 means give the range of the entire
359 * match, > 0 means give the range of
360 * a matching subrange, -1 means the
361 * range of the rm_extend field. */
362 int *startPtr; /* Store address of first character in
363 * (sub-) range here. */
364 int *endPtr; /* Store address of character just after last
365 * in (sub-) range here. */
366 {
367 TclRegexp *regexpPtr = (TclRegexp *) re;
368
369 if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
370 *startPtr = regexpPtr->details.rm_extend.rm_so;
371 *endPtr = regexpPtr->details.rm_extend.rm_eo;
372 } else if ((size_t) index > regexpPtr->re.re_nsub) {
373 *startPtr = -1;
374 *endPtr = -1;
375 } else {
376 *startPtr = regexpPtr->matches[index].rm_so;
377 *endPtr = regexpPtr->matches[index].rm_eo;
378 }
379 }
380
381 /*
382 *----------------------------------------------------------------------
383 *
384 * Tcl_RegExpMatch --
385 *
386 * See if a string matches a regular expression.
387 *
388 * Results:
389 * If an error occurs during the matching operation then -1
390 * is returned and the interp's result contains an error message.
391 * Otherwise the return value is 1 if "string" matches "pattern"
392 * and 0 otherwise.
393 *
394 * Side effects:
395 * None.
396 *
397 *----------------------------------------------------------------------
398 */
399
400 int
401 Tcl_RegExpMatch(interp, string, pattern)
402 Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
403 char *string; /* String. */
404 char *pattern; /* Regular expression to match against
405 * string. */
406 {
407 Tcl_RegExp re;
408
409 re = Tcl_RegExpCompile(interp, pattern);
410 if (re == NULL) {
411 return -1;
412 }
413 return Tcl_RegExpExec(interp, re, string, string);
414 }
415
416 /*
417 *----------------------------------------------------------------------
418 *
419 * Tcl_RegExpExecObj --
420 *
421 * Execute a precompiled regexp against the given object.
422 *
423 * Results:
424 * If an error occurs during the matching operation then -1
425 * is returned and the interp's result contains an error message.
426 * Otherwise the return value is 1 if "string" matches "pattern"
427 * and 0 otherwise.
428 *
429 * Side effects:
430 * Converts the object to a Unicode object.
431 *
432 *----------------------------------------------------------------------
433 */
434
435 int
436 Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
437 Tcl_Interp *interp; /* Interpreter to use for error reporting. */
438 Tcl_RegExp re; /* Compiled regular expression; must have
439 * been returned by previous call to
440 * Tcl_GetRegExpFromObj. */
441 Tcl_Obj *objPtr; /* String against which to match re. */
442 int offset; /* Character index that marks where matching
443 * should begin. */
444 int nmatches; /* How many subexpression matches (counting
445 * the whole match as subexpression 0) are
446 * of interest. -1 means all of them. */
447 int flags; /* Regular expression execution flags. */
448 {
449 TclRegexp *regexpPtr = (TclRegexp *) re;
450 Tcl_UniChar *udata;
451 int length;
452
453 /*
454 * Save the target object so we can extract strings from it later.
455 */
456
457 regexpPtr->string = NULL;
458 regexpPtr->objPtr = objPtr;
459
460 udata = Tcl_GetUnicode(objPtr);
461 length = Tcl_GetCharLength(objPtr);
462
463 if (offset > length) {
464 offset = length;
465 }
466 udata += offset;
467 length -= offset;
468
469 return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
470 }
471
472 /*
473 *----------------------------------------------------------------------
474 *
475 * Tcl_RegExpMatchObj --
476 *
477 * See if an object matches a regular expression.
478 *
479 * Results:
480 * If an error occurs during the matching operation then -1
481 * is returned and the interp's result contains an error message.
482 * Otherwise the return value is 1 if "string" matches "pattern"
483 * and 0 otherwise.
484 *
485 * Side effects:
486 * Changes the internal rep of the pattern and string objects.
487 *
488 *----------------------------------------------------------------------
489 */
490
491 int
492 Tcl_RegExpMatchObj(interp, stringObj, patternObj)
493 Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
494 Tcl_Obj *stringObj; /* Object containing the String to search. */
495 Tcl_Obj *patternObj; /* Regular expression to match against
496 * string. */
497 {
498 Tcl_RegExp re;
499
500 re = Tcl_GetRegExpFromObj(interp, patternObj,
501 TCL_REG_ADVANCED | TCL_REG_NOSUB);
502 if (re == NULL) {
503 return -1;
504 }
505 return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
506 0 /* nmatches */, 0 /* flags */);
507 }
508
509 /*
510 *----------------------------------------------------------------------
511 *
512 * Tcl_RegExpGetInfo --
513 *
514 * Retrieve information about the current match.
515 *
516 * Results:
517 * None.
518 *
519 * Side effects:
520 * None.
521 *
522 *----------------------------------------------------------------------
523 */
524
525 void
526 Tcl_RegExpGetInfo(regexp, infoPtr)
527 Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
528 Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
529 {
530 TclRegexp *regexpPtr = (TclRegexp *) regexp;
531
532 infoPtr->nsubs = regexpPtr->re.re_nsub;
533 infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
534 infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
535 }
536
537 /*
538 *----------------------------------------------------------------------
539 *
540 * Tcl_GetRegExpFromObj --
541 *
542 * Compile a regular expression into a form suitable for fast
543 * matching. This procedure caches the result in a Tcl_Obj.
544 *
545 * Results:
546 * The return value is a pointer to the compiled form of string,
547 * suitable for passing to Tcl_RegExpExec. If an error occurred
548 * while compiling the pattern, then NULL is returned and an error
549 * message is left in the interp's result.
550 *
551 * Side effects:
552 * Updates the native rep of the Tcl_Obj.
553 *
554 *----------------------------------------------------------------------
555 */
556
557 Tcl_RegExp
558 Tcl_GetRegExpFromObj(interp, objPtr, flags)
559 Tcl_Interp *interp; /* For use in error reporting, and to access
560 * the interp regexp cache. */
561 Tcl_Obj *objPtr; /* Object whose string rep contains regular
562 * expression pattern. Internal rep will be
563 * changed to compiled form of this regular
564 * expression. */
565 int flags; /* Regular expression compilation flags. */
566 {
567 int length;
568 Tcl_ObjType *typePtr;
569 TclRegexp *regexpPtr;
570 char *pattern;
571
572 typePtr = objPtr->typePtr;
573 regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
574
575 if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
576 pattern = Tcl_GetStringFromObj(objPtr, &length);
577
578 regexpPtr = CompileRegexp(interp, pattern, length, flags);
579 if (regexpPtr == NULL) {
580 return NULL;
581 }
582
583 /*
584 * Add a reference to the regexp so it will persist even if it is
585 * pushed out of the current thread's regexp cache. This reference
586 * will be removed when the object's internal rep is freed.
587 */
588
589 regexpPtr->refCount++;
590
591 /*
592 * Free the old representation and set our type.
593 */
594
595 if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
596 (*typePtr->freeIntRepProc)(objPtr);
597 }
598 objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
599 objPtr->typePtr = &tclRegexpType;
600 }
601 return (Tcl_RegExp) regexpPtr;
602 }
603
604 /*
605 *----------------------------------------------------------------------
606 *
607 * TclRegAbout --
608 *
609 * Return information about a compiled regular expression.
610 *
611 * Results:
612 * The return value is -1 for failure, 0 for success, although at
613 * the moment there's nothing that could fail. On success, a list
614 * is left in the interp's result: first element is the subexpression
615 * count, second is a list of re_info bit names.
616 *
617 * Side effects:
618 * None.
619 *
620 *----------------------------------------------------------------------
621 */
622
623 int
624 TclRegAbout(interp, re)
625 Tcl_Interp *interp; /* For use in variable assignment. */
626 Tcl_RegExp re; /* The compiled regular expression. */
627 {
628 TclRegexp *regexpPtr = (TclRegexp *)re;
629 char buf[TCL_INTEGER_SPACE];
630 static struct infoname {
631 int bit;
632 char *text;
633 } infonames[] = {
634 {REG_UBACKREF, "REG_UBACKREF"},
635 {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"},
636 {REG_UBOUNDS, "REG_UBOUNDS"},
637 {REG_UBRACES, "REG_UBRACES"},
638 {REG_UBSALNUM, "REG_UBSALNUM"},
639 {REG_UPBOTCH, "REG_UPBOTCH"},
640 {REG_UBBS, "REG_UBBS"},
641 {REG_UNONPOSIX, "REG_UNONPOSIX"},
642 {REG_UUNSPEC, "REG_UUNSPEC"},
643 {REG_UUNPORT, "REG_UUNPORT"},
644 {REG_ULOCALE, "REG_ULOCALE"},
645 {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
646 {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
647 {REG_USHORTEST, "REG_USHORTEST"},
648 {0, ""}
649 };
650 struct infoname *inf;
651 int n;
652
653 Tcl_ResetResult(interp);
654
655 sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
656 Tcl_AppendElement(interp, buf);
657
658 /*
659 * Must count bits before generating list, because we must know
660 * whether {} are needed before we start appending names.
661 */
662 n = 0;
663 for (inf = infonames; inf->bit != 0; inf++) {
664 if (regexpPtr->re.re_info&inf->bit) {
665 n++;
666 }
667 }
668 if (n != 1) {
669 Tcl_AppendResult(interp, " {", NULL);
670 }
671 for (inf = infonames; inf->bit != 0; inf++) {
672 if (regexpPtr->re.re_info&inf->bit) {
673 Tcl_AppendElement(interp, inf->text);
674 }
675 }
676 if (n != 1) {
677 Tcl_AppendResult(interp, "}", NULL);
678 }
679
680 return 0;
681 }
682
683 /*
684 *----------------------------------------------------------------------
685 *
686 * TclRegError --
687 *
688 * Generate an error message based on the regexp status code.
689 *
690 * Results:
691 * Places an error in the interpreter.
692 *
693 * Side effects:
694 * Sets errorCode as well.
695 *
696 *----------------------------------------------------------------------
697 */
698
699 void
700 TclRegError(interp, msg, status)
701 Tcl_Interp *interp; /* Interpreter for error reporting. */
702 char *msg; /* Message to prepend to error. */
703 int status; /* Status code to report. */
704 {
705 char buf[100]; /* ample in practice */
706 char cbuf[100]; /* lots in practice */
707 size_t n;
708 char *p;
709
710 Tcl_ResetResult(interp);
711 n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
712 p = (n > sizeof(buf)) ? "..." : "";
713 Tcl_AppendResult(interp, msg, buf, p, NULL);
714
715 sprintf(cbuf, "%d", status);
716 (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
717 Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
718 }
719
720
721 /*
722 *----------------------------------------------------------------------
723 *
724 * FreeRegexpInternalRep --
725 *
726 * Deallocate the storage associated with a regexp object's internal
727 * representation.
728 *
729 * Results:
730 * None.
731 *
732 * Side effects:
733 * Frees the compiled regular expression.
734 *
735 *----------------------------------------------------------------------
736 */
737
738 static void
739 FreeRegexpInternalRep(objPtr)
740 Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */
741 {
742 TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
743
744 /*
745 * If this is the last reference to the regexp, free it.
746 */
747
748 if (--(regexpRepPtr->refCount) <= 0) {
749 FreeRegexp(regexpRepPtr);
750 }
751 }
752
753 /*
754 *----------------------------------------------------------------------
755 *
756 * DupRegexpInternalRep --
757 *
758 * We copy the reference to the compiled regexp and bump its
759 * reference count.
760 *
761 * Results:
762 * None.
763 *
764 * Side effects:
765 * Increments the reference count of the regexp.
766 *
767 *----------------------------------------------------------------------
768 */
769
770 static void
771 DupRegexpInternalRep(srcPtr, copyPtr)
772 Tcl_Obj *srcPtr; /* Object with internal rep to copy. */
773 Tcl_Obj *copyPtr; /* Object with internal rep to set. */
774 {
775 TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
776 regexpPtr->refCount++;
777 copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
778 copyPtr->typePtr = &tclRegexpType;
779 }
780
781 /*
782 *----------------------------------------------------------------------
783 *
784 * SetRegexpFromAny --
785 *
786 * Attempt to generate a compiled regular expression for the Tcl object
787 * "objPtr".
788 *
789 * Results:
790 * The return value is TCL_OK or TCL_ERROR. If an error occurs during
791 * conversion, an error message is left in the interpreter's result
792 * unless "interp" is NULL.
793 *
794 * Side effects:
795 * If no error occurs, a regular expression is stored as "objPtr"s
796 * internal representation.
797 *
798 *----------------------------------------------------------------------
799 */
800
801 static int
802 SetRegexpFromAny(interp, objPtr)
803 Tcl_Interp *interp; /* Used for error reporting if not NULL. */
804 Tcl_Obj *objPtr; /* The object to convert. */
805 {
806 if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
807 return TCL_ERROR;
808 }
809 return TCL_OK;
810 }
811
812 /*
813 *---------------------------------------------------------------------------
814 *
815 * CompileRegexp --
816 *
817 * Attempt to compile the given regexp pattern. If the compiled
818 * regular expression can be found in the per-thread cache, it
819 * will be used instead of compiling a new copy.
820 *
821 * Results:
822 * The return value is a pointer to a newly allocated TclRegexp
823 * that represents the compiled pattern, or NULL if the pattern
824 * could not be compiled. If NULL is returned, an error message is
825 * left in the interp's result.
826 *
827 * Side effects:
828 * The thread-local regexp cache is updated and a new TclRegexp may
829 * be allocated.
830 *
831 *----------------------------------------------------------------------
832 */
833
834 static TclRegexp *
835 CompileRegexp(interp, string, length, flags)
836 Tcl_Interp *interp; /* Used for error reporting if not NULL. */
837 char *string; /* The regexp to compile (UTF-8). */
838 int length; /* The length of the string in bytes. */
839 int flags; /* Compilation flags. */
840 {
841 TclRegexp *regexpPtr;
842 Tcl_UniChar *uniString;
843 int numChars;
844 Tcl_DString stringBuf;
845 int status, i;
846 ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
847
848 if (!tsdPtr->initialized) {
849 tsdPtr->initialized = 1;
850 Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
851 }
852
853 /*
854 * This routine maintains a second-level regular expression cache in
855 * addition to the per-object regexp cache. The per-thread cache is needed
856 * to handle the case where for various reasons the object is lost between
857 * invocations of the regexp command, but the literal pattern is the same.
858 */
859
860 /*
861 * Check the per-thread compiled regexp cache. We can only reuse
862 * a regexp if it has the same pattern and the same flags.
863 */
864
865 for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
866 if ((length == tsdPtr->patLengths[i])
867 && (tsdPtr->regexps[i]->flags == flags)
868 && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
869 /*
870 * Move the matched pattern to the first slot in the
871 * cache and shift the other patterns down one position.
872 */
873
874 if (i != 0) {
875 int j;
876 char *cachedString;
877
878 cachedString = tsdPtr->patterns[i];
879 regexpPtr = tsdPtr->regexps[i];
880 for (j = i-1; j >= 0; j--) {
881 tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
882 tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
883 tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
884 }
885 tsdPtr->patterns[0] = cachedString;
886 tsdPtr->patLengths[0] = length;
887 tsdPtr->regexps[0] = regexpPtr;
888 }
889 return tsdPtr->regexps[0];
890 }
891 }
892
893 /*
894 * This is a new expression, so compile it and add it to the cache.
895 */
896
897 regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
898 regexpPtr->objPtr = NULL;
899 regexpPtr->string = NULL;
900 regexpPtr->details.rm_extend.rm_so = -1;
901 regexpPtr->details.rm_extend.rm_eo = -1;
902
903 /*
904 * Get the up-to-date string representation and map to unicode.
905 */
906
907 Tcl_DStringInit(&stringBuf);
908 uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
909 numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
910
911 /*
912 * Compile the string and check for errors.
913 */
914
915 regexpPtr->flags = flags;
916 status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
917 Tcl_DStringFree(&stringBuf);
918
919 if (status != REG_OKAY) {
920 /*
921 * Clean up and report errors in the interpreter, if possible.
922 */
923
924 ckfree((char *)regexpPtr);
925 if (interp) {
926 TclRegError(interp,
927 "couldn't compile regular expression pattern: ",
928 status);
929 }
930 return NULL;
931 }
932
933 /*
934 * Allocate enough space for all of the subexpressions, plus one
935 * extra for the entire pattern.
936 */
937
938 regexpPtr->matches = (regmatch_t *) ckalloc(
939 sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
940
941 /*
942 * Initialize the refcount to one initially, since it is in the cache.
943 */
944
945 regexpPtr->refCount = 1;
946
947 /*
948 * Free the last regexp, if necessary, and make room at the head of the
949 * list for the new regexp.
950 */
951
952 if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
953 TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
954 if (--(oldRegexpPtr->refCount) <= 0) {
955 FreeRegexp(oldRegexpPtr);
956 }
957 ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
958 }
959 for (i = NUM_REGEXPS - 2; i >= 0; i--) {
960 tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
961 tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
962 tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
963 }
964 tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
965 strcpy(tsdPtr->patterns[0], string);
966 tsdPtr->patLengths[0] = length;
967 tsdPtr->regexps[0] = regexpPtr;
968
969 return regexpPtr;
970 }
971
972 /*
973 *----------------------------------------------------------------------
974 *
975 * FreeRegexp --
976 *
977 * Release the storage associated with a TclRegexp.
978 *
979 * Results:
980 * None.
981 *
982 * Side effects:
983 * None.
984 *
985 *----------------------------------------------------------------------
986 */
987
988 static void
989 FreeRegexp(regexpPtr)
990 TclRegexp *regexpPtr; /* Compiled regular expression to free. */
991 {
992 TclReFree(&regexpPtr->re);
993 if (regexpPtr->matches) {
994 ckfree((char *) regexpPtr->matches);
995 }
996 ckfree((char *) regexpPtr);
997 }
998
999 /*
1000 *----------------------------------------------------------------------
1001 *
1002 * FinalizeRegexp --
1003 *
1004 * Release the storage associated with the per-thread regexp
1005 * cache.
1006 *
1007 * Results:
1008 * None.
1009 *
1010 * Side effects:
1011 * None.
1012 *
1013 *----------------------------------------------------------------------
1014 */
1015
1016 static void
1017 FinalizeRegexp(clientData)
1018 ClientData clientData; /* Not used. */
1019 {
1020 int i;
1021 TclRegexp *regexpPtr;
1022 ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1023
1024 for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1025 regexpPtr = tsdPtr->regexps[i];
1026 if (--(regexpPtr->refCount) <= 0) {
1027 FreeRegexp(regexpPtr);
1028 }
1029 ckfree(tsdPtr->patterns[i]);
1030 }
1031 }
1032
1033
1034 /* $History: tclregexp.c $
1035 *
1036 * ***************** Version 1 *****************
1037 * User: Dtashley Date: 1/02/01 Time: 1:04a
1038 * Created in $/IjuScripter, IjuConsole/Source/Tcl Base
1039 * Initial check-in.
1040 */
1041
1042 /* End of TCLREGEXP.C */

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25