/[dtapublic]/projs/trunk/shared_source/c_tcl_base_7_5_w_mods/tclregexp.c
ViewVC logotype

Contents of /projs/trunk/shared_source/c_tcl_base_7_5_w_mods/tclregexp.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 71 - (show annotations) (download)
Sat Nov 5 11:07:06 2016 UTC (8 years ago) by dashley
File MIME type: text/plain
File size: 30066 byte(s)
Set EOL properties appropriately to facilitate simultaneous Linux and Windows development.
1 /* $Header$ */
2 /*
3 * tclRegexp.c --
4 *
5 * This file contains the public interfaces to the Tcl regular
6 * expression mechanism.
7 *
8 * Copyright (c) 1998 by Sun Microsystems, Inc.
9 * Copyright (c) 1998-1999 by Scriptics Corporation.
10 *
11 * See the file "license.terms" for information on usage and redistribution
12 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
13 *
14 * RCS: @(#) $Id: tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $
15 */
16
17 #include "tclInt.h"
18 #include "tclPort.h"
19 #include "tclRegexp.h"
20
21 /*
22 *----------------------------------------------------------------------
23 * The routines in this file use Henry Spencer's regular expression
24 * package contained in the following additional source files:
25 *
26 * regc_color.c regc_cvec.c regc_lex.c
27 * regc_nfa.c regcomp.c regcustom.h
28 * rege_dfa.c regerror.c regerrs.h
29 * regex.h regexec.c regfree.c
30 * regfronts.c regguts.h
31 *
32 * Copyright (c) 1998 Henry Spencer. All rights reserved.
33 *
34 * Development of this software was funded, in part, by Cray Research Inc.,
35 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
36 * Corporation, none of whom are responsible for the results. The author
37 * thanks all of them.
38 *
39 * Redistribution and use in source and binary forms -- with or without
40 * modification -- are permitted for any purpose, provided that
41 * redistributions in source form retain this entire copyright notice and
42 * indicate the origin and nature of any modifications.
43 *
44 * I'd appreciate being given credit for this package in the documentation
45 * of software which uses it, but that is not a requirement.
46 *
47 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
48 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
49 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
50 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
51 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
52 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
53 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
54 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
55 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
56 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57 *
58 * *** NOTE: this code has been altered slightly for use in Tcl: ***
59 * *** 1. Names have been changed, e.g. from re_comp to ***
60 * *** TclRegComp, to avoid clashes with other ***
61 * *** regexp implementations used by applications. ***
62 */
63
64 /*
65 * Thread local storage used to maintain a per-thread cache of compiled
66 * regular expressions.
67 */
68
69 #define NUM_REGEXPS 30
70
71 typedef struct ThreadSpecificData {
72 int initialized; /* Set to 1 when the module is initialized. */
73 char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
74 * regular expression patterns. NULL
75 * means that this slot isn't used.
76 * Malloc-ed. */
77 int patLengths[NUM_REGEXPS];/* Number of non-null characters in
78 * corresponding entry in patterns.
79 * -1 means entry isn't used. */
80 struct TclRegexp *regexps[NUM_REGEXPS];
81 /* Compiled forms of above strings. Also
82 * malloc-ed, or NULL if not in use yet. */
83 } ThreadSpecificData;
84
85 static Tcl_ThreadDataKey dataKey;
86
87 /*
88 * Declarations for functions used only in this file.
89 */
90
91 static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
92 char *pattern, int length, int flags));
93 static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
94 Tcl_Obj *copyPtr));
95 static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
96 static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
97 static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
98 static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
99 Tcl_RegExp re, CONST Tcl_UniChar *uniString,
100 int numChars, int nmatches, int flags));
101 static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
102 Tcl_Obj *objPtr));
103
104 /*
105 * The regular expression Tcl object type. This serves as a cache
106 * of the compiled form of the regular expression.
107 */
108
109 Tcl_ObjType tclRegexpType = {
110 "regexp", /* name */
111 FreeRegexpInternalRep, /* freeIntRepProc */
112 DupRegexpInternalRep, /* dupIntRepProc */
113 NULL, /* updateStringProc */
114 SetRegexpFromAny /* setFromAnyProc */
115 };
116
117
118 /*
119 *----------------------------------------------------------------------
120 *
121 * Tcl_RegExpCompile --
122 *
123 * Compile a regular expression into a form suitable for fast
124 * matching. This procedure is DEPRECATED in favor of the
125 * object version of the command.
126 *
127 * Results:
128 * The return value is a pointer to the compiled form of string,
129 * suitable for passing to Tcl_RegExpExec. This compiled form
130 * is only valid up until the next call to this procedure, so
131 * don't keep these around for a long time! If an error occurred
132 * while compiling the pattern, then NULL is returned and an error
133 * message is left in the interp's result.
134 *
135 * Side effects:
136 * Updates the cache of compiled regexps.
137 *
138 *----------------------------------------------------------------------
139 */
140
141 Tcl_RegExp
142 Tcl_RegExpCompile(interp, string)
143 Tcl_Interp *interp; /* For use in error reporting and
144 * to access the interp regexp cache. */
145 char *string; /* String for which to produce
146 * compiled regular expression. */
147 {
148 return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
149 REG_ADVANCED);
150 }
151
152 /*
153 *----------------------------------------------------------------------
154 *
155 * Tcl_RegExpExec --
156 *
157 * Execute the regular expression matcher using a compiled form
158 * of a regular expression and save information about any match
159 * that is found.
160 *
161 * Results:
162 * If an error occurs during the matching operation then -1
163 * is returned and the interp's result contains an error message.
164 * Otherwise the return value is 1 if a matching range is
165 * found and 0 if there is no matching range.
166 *
167 * Side effects:
168 * None.
169 *
170 *----------------------------------------------------------------------
171 */
172
173 int
174 Tcl_RegExpExec(interp, re, string, start)
175 Tcl_Interp *interp; /* Interpreter to use for error reporting. */
176 Tcl_RegExp re; /* Compiled regular expression; must have
177 * been returned by previous call to
178 * Tcl_GetRegExpFromObj. */
179 CONST char *string; /* String against which to match re. */
180 CONST char *start; /* If string is part of a larger string,
181 * this identifies beginning of larger
182 * string, so that "^" won't match. */
183 {
184 int flags, result, numChars;
185 TclRegexp *regexp = (TclRegexp *)re;
186 Tcl_DString ds;
187 Tcl_UniChar *ustr;
188
189 /*
190 * If the starting point is offset from the beginning of the buffer,
191 * then we need to tell the regexp engine not to match "^".
192 */
193
194 if (string > start) {
195 flags = REG_NOTBOL;
196 } else {
197 flags = 0;
198 }
199
200 /*
201 * Remember the string for use by Tcl_RegExpRange().
202 */
203
204 regexp->string = string;
205 regexp->objPtr = NULL;
206
207 /*
208 * Convert the string to Unicode and perform the match.
209 */
210
211 Tcl_DStringInit(&ds);
212 ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
213 numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
214 result = RegExpExecUniChar(interp, re, ustr, numChars,
215 -1 /* nmatches */, flags);
216 Tcl_DStringFree(&ds);
217
218 return result;
219 }
220
221 /*
222 *---------------------------------------------------------------------------
223 *
224 * Tcl_RegExpRange --
225 *
226 * Returns pointers describing the range of a regular expression match,
227 * or one of the subranges within the match.
228 *
229 * Results:
230 * The variables at *startPtr and *endPtr are modified to hold the
231 * addresses of the endpoints of the range given by index. If the
232 * specified range doesn't exist then NULLs are returned.
233 *
234 * Side effects:
235 * None.
236 *
237 *---------------------------------------------------------------------------
238 */
239
240 void
241 Tcl_RegExpRange(re, index, startPtr, endPtr)
242 Tcl_RegExp re; /* Compiled regular expression that has
243 * been passed to Tcl_RegExpExec. */
244 int index; /* 0 means give the range of the entire
245 * match, > 0 means give the range of
246 * a matching subrange. */
247 char **startPtr; /* Store address of first character in
248 * (sub-) range here. */
249 char **endPtr; /* Store address of character just after last
250 * in (sub-) range here. */
251 {
252 TclRegexp *regexpPtr = (TclRegexp *) re;
253 CONST char *string;
254
255 if ((size_t) index > regexpPtr->re.re_nsub) {
256 *startPtr = *endPtr = NULL;
257 } else if (regexpPtr->matches[index].rm_so < 0) {
258 *startPtr = *endPtr = NULL;
259 } else {
260 if (regexpPtr->objPtr) {
261 string = Tcl_GetString(regexpPtr->objPtr);
262 } else {
263 string = regexpPtr->string;
264 }
265 *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
266 *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
267 }
268 }
269
270 /*
271 *---------------------------------------------------------------------------
272 *
273 * RegExpExecUniChar --
274 *
275 * Execute the regular expression matcher using a compiled form of a
276 * regular expression and save information about any match that is
277 * found.
278 *
279 * Results:
280 * If an error occurs during the matching operation then -1 is
281 * returned and an error message is left in interp's result.
282 * Otherwise the return value is 1 if a matching range was found or
283 * 0 if there was no matching range.
284 *
285 * Side effects:
286 * None.
287 *
288 *----------------------------------------------------------------------
289 */
290
291 static int
292 RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
293 Tcl_Interp *interp; /* Interpreter to use for error reporting. */
294 Tcl_RegExp re; /* Compiled regular expression; returned by
295 * a previous call to Tcl_GetRegExpFromObj */
296 CONST Tcl_UniChar *wString; /* String against which to match re. */
297 int numChars; /* Length of Tcl_UniChar string (must
298 * be >= 0). */
299 int nmatches; /* How many subexpression matches (counting
300 * the whole match as subexpression 0) are
301 * of interest. -1 means "don't know". */
302 int flags; /* Regular expression flags. */
303 {
304 int status;
305 TclRegexp *regexpPtr = (TclRegexp *) re;
306 size_t last = regexpPtr->re.re_nsub + 1;
307 size_t nm = last;
308
309 if (nmatches >= 0 && (size_t) nmatches < nm) {
310 nm = (size_t) nmatches;
311 }
312
313 status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
314 &regexpPtr->details, nm, regexpPtr->matches, flags);
315
316 /*
317 * Check for errors.
318 */
319
320 if (status != REG_OKAY) {
321 if (status == REG_NOMATCH) {
322 return 0;
323 }
324 if (interp != NULL) {
325 TclRegError(interp, "error while matching regular expression: ",
326 status);
327 }
328 return -1;
329 }
330 return 1;
331 }
332
333 /*
334 *---------------------------------------------------------------------------
335 *
336 * TclRegExpRangeUniChar --
337 *
338 * Returns pointers describing the range of a regular expression match,
339 * or one of the subranges within the match, or the hypothetical range
340 * represented by the rm_extend field of the rm_detail_t.
341 *
342 * Results:
343 * The variables at *startPtr and *endPtr are modified to hold the
344 * offsets of the endpoints of the range given by index. If the
345 * specified range doesn't exist then -1s are supplied.
346 *
347 * Side effects:
348 * None.
349 *
350 *---------------------------------------------------------------------------
351 */
352
353 void
354 TclRegExpRangeUniChar(re, index, startPtr, endPtr)
355 Tcl_RegExp re; /* Compiled regular expression that has
356 * been passed to Tcl_RegExpExec. */
357 int index; /* 0 means give the range of the entire
358 * match, > 0 means give the range of
359 * a matching subrange, -1 means the
360 * range of the rm_extend field. */
361 int *startPtr; /* Store address of first character in
362 * (sub-) range here. */
363 int *endPtr; /* Store address of character just after last
364 * in (sub-) range here. */
365 {
366 TclRegexp *regexpPtr = (TclRegexp *) re;
367
368 if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
369 *startPtr = regexpPtr->details.rm_extend.rm_so;
370 *endPtr = regexpPtr->details.rm_extend.rm_eo;
371 } else if ((size_t) index > regexpPtr->re.re_nsub) {
372 *startPtr = -1;
373 *endPtr = -1;
374 } else {
375 *startPtr = regexpPtr->matches[index].rm_so;
376 *endPtr = regexpPtr->matches[index].rm_eo;
377 }
378 }
379
380 /*
381 *----------------------------------------------------------------------
382 *
383 * Tcl_RegExpMatch --
384 *
385 * See if a string matches a regular expression.
386 *
387 * Results:
388 * If an error occurs during the matching operation then -1
389 * is returned and the interp's result contains an error message.
390 * Otherwise the return value is 1 if "string" matches "pattern"
391 * and 0 otherwise.
392 *
393 * Side effects:
394 * None.
395 *
396 *----------------------------------------------------------------------
397 */
398
399 int
400 Tcl_RegExpMatch(interp, string, pattern)
401 Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
402 char *string; /* String. */
403 char *pattern; /* Regular expression to match against
404 * string. */
405 {
406 Tcl_RegExp re;
407
408 re = Tcl_RegExpCompile(interp, pattern);
409 if (re == NULL) {
410 return -1;
411 }
412 return Tcl_RegExpExec(interp, re, string, string);
413 }
414
415 /*
416 *----------------------------------------------------------------------
417 *
418 * Tcl_RegExpExecObj --
419 *
420 * Execute a precompiled regexp against the given object.
421 *
422 * Results:
423 * If an error occurs during the matching operation then -1
424 * is returned and the interp's result contains an error message.
425 * Otherwise the return value is 1 if "string" matches "pattern"
426 * and 0 otherwise.
427 *
428 * Side effects:
429 * Converts the object to a Unicode object.
430 *
431 *----------------------------------------------------------------------
432 */
433
434 int
435 Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
436 Tcl_Interp *interp; /* Interpreter to use for error reporting. */
437 Tcl_RegExp re; /* Compiled regular expression; must have
438 * been returned by previous call to
439 * Tcl_GetRegExpFromObj. */
440 Tcl_Obj *objPtr; /* String against which to match re. */
441 int offset; /* Character index that marks where matching
442 * should begin. */
443 int nmatches; /* How many subexpression matches (counting
444 * the whole match as subexpression 0) are
445 * of interest. -1 means all of them. */
446 int flags; /* Regular expression execution flags. */
447 {
448 TclRegexp *regexpPtr = (TclRegexp *) re;
449 Tcl_UniChar *udata;
450 int length;
451
452 /*
453 * Save the target object so we can extract strings from it later.
454 */
455
456 regexpPtr->string = NULL;
457 regexpPtr->objPtr = objPtr;
458
459 udata = Tcl_GetUnicode(objPtr);
460 length = Tcl_GetCharLength(objPtr);
461
462 if (offset > length) {
463 offset = length;
464 }
465 udata += offset;
466 length -= offset;
467
468 return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
469 }
470
471 /*
472 *----------------------------------------------------------------------
473 *
474 * Tcl_RegExpMatchObj --
475 *
476 * See if an object matches a regular expression.
477 *
478 * Results:
479 * If an error occurs during the matching operation then -1
480 * is returned and the interp's result contains an error message.
481 * Otherwise the return value is 1 if "string" matches "pattern"
482 * and 0 otherwise.
483 *
484 * Side effects:
485 * Changes the internal rep of the pattern and string objects.
486 *
487 *----------------------------------------------------------------------
488 */
489
490 int
491 Tcl_RegExpMatchObj(interp, stringObj, patternObj)
492 Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
493 Tcl_Obj *stringObj; /* Object containing the String to search. */
494 Tcl_Obj *patternObj; /* Regular expression to match against
495 * string. */
496 {
497 Tcl_RegExp re;
498
499 re = Tcl_GetRegExpFromObj(interp, patternObj,
500 TCL_REG_ADVANCED | TCL_REG_NOSUB);
501 if (re == NULL) {
502 return -1;
503 }
504 return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
505 0 /* nmatches */, 0 /* flags */);
506 }
507
508 /*
509 *----------------------------------------------------------------------
510 *
511 * Tcl_RegExpGetInfo --
512 *
513 * Retrieve information about the current match.
514 *
515 * Results:
516 * None.
517 *
518 * Side effects:
519 * None.
520 *
521 *----------------------------------------------------------------------
522 */
523
524 void
525 Tcl_RegExpGetInfo(regexp, infoPtr)
526 Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
527 Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
528 {
529 TclRegexp *regexpPtr = (TclRegexp *) regexp;
530
531 infoPtr->nsubs = regexpPtr->re.re_nsub;
532 infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
533 infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
534 }
535
536 /*
537 *----------------------------------------------------------------------
538 *
539 * Tcl_GetRegExpFromObj --
540 *
541 * Compile a regular expression into a form suitable for fast
542 * matching. This procedure caches the result in a Tcl_Obj.
543 *
544 * Results:
545 * The return value is a pointer to the compiled form of string,
546 * suitable for passing to Tcl_RegExpExec. If an error occurred
547 * while compiling the pattern, then NULL is returned and an error
548 * message is left in the interp's result.
549 *
550 * Side effects:
551 * Updates the native rep of the Tcl_Obj.
552 *
553 *----------------------------------------------------------------------
554 */
555
556 Tcl_RegExp
557 Tcl_GetRegExpFromObj(interp, objPtr, flags)
558 Tcl_Interp *interp; /* For use in error reporting, and to access
559 * the interp regexp cache. */
560 Tcl_Obj *objPtr; /* Object whose string rep contains regular
561 * expression pattern. Internal rep will be
562 * changed to compiled form of this regular
563 * expression. */
564 int flags; /* Regular expression compilation flags. */
565 {
566 int length;
567 Tcl_ObjType *typePtr;
568 TclRegexp *regexpPtr;
569 char *pattern;
570
571 typePtr = objPtr->typePtr;
572 regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
573
574 if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
575 pattern = Tcl_GetStringFromObj(objPtr, &length);
576
577 regexpPtr = CompileRegexp(interp, pattern, length, flags);
578 if (regexpPtr == NULL) {
579 return NULL;
580 }
581
582 /*
583 * Add a reference to the regexp so it will persist even if it is
584 * pushed out of the current thread's regexp cache. This reference
585 * will be removed when the object's internal rep is freed.
586 */
587
588 regexpPtr->refCount++;
589
590 /*
591 * Free the old representation and set our type.
592 */
593
594 if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
595 (*typePtr->freeIntRepProc)(objPtr);
596 }
597 objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
598 objPtr->typePtr = &tclRegexpType;
599 }
600 return (Tcl_RegExp) regexpPtr;
601 }
602
603 /*
604 *----------------------------------------------------------------------
605 *
606 * TclRegAbout --
607 *
608 * Return information about a compiled regular expression.
609 *
610 * Results:
611 * The return value is -1 for failure, 0 for success, although at
612 * the moment there's nothing that could fail. On success, a list
613 * is left in the interp's result: first element is the subexpression
614 * count, second is a list of re_info bit names.
615 *
616 * Side effects:
617 * None.
618 *
619 *----------------------------------------------------------------------
620 */
621
622 int
623 TclRegAbout(interp, re)
624 Tcl_Interp *interp; /* For use in variable assignment. */
625 Tcl_RegExp re; /* The compiled regular expression. */
626 {
627 TclRegexp *regexpPtr = (TclRegexp *)re;
628 char buf[TCL_INTEGER_SPACE];
629 static struct infoname {
630 int bit;
631 char *text;
632 } infonames[] = {
633 {REG_UBACKREF, "REG_UBACKREF"},
634 {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"},
635 {REG_UBOUNDS, "REG_UBOUNDS"},
636 {REG_UBRACES, "REG_UBRACES"},
637 {REG_UBSALNUM, "REG_UBSALNUM"},
638 {REG_UPBOTCH, "REG_UPBOTCH"},
639 {REG_UBBS, "REG_UBBS"},
640 {REG_UNONPOSIX, "REG_UNONPOSIX"},
641 {REG_UUNSPEC, "REG_UUNSPEC"},
642 {REG_UUNPORT, "REG_UUNPORT"},
643 {REG_ULOCALE, "REG_ULOCALE"},
644 {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
645 {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
646 {REG_USHORTEST, "REG_USHORTEST"},
647 {0, ""}
648 };
649 struct infoname *inf;
650 int n;
651
652 Tcl_ResetResult(interp);
653
654 sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
655 Tcl_AppendElement(interp, buf);
656
657 /*
658 * Must count bits before generating list, because we must know
659 * whether {} are needed before we start appending names.
660 */
661 n = 0;
662 for (inf = infonames; inf->bit != 0; inf++) {
663 if (regexpPtr->re.re_info&inf->bit) {
664 n++;
665 }
666 }
667 if (n != 1) {
668 Tcl_AppendResult(interp, " {", NULL);
669 }
670 for (inf = infonames; inf->bit != 0; inf++) {
671 if (regexpPtr->re.re_info&inf->bit) {
672 Tcl_AppendElement(interp, inf->text);
673 }
674 }
675 if (n != 1) {
676 Tcl_AppendResult(interp, "}", NULL);
677 }
678
679 return 0;
680 }
681
682 /*
683 *----------------------------------------------------------------------
684 *
685 * TclRegError --
686 *
687 * Generate an error message based on the regexp status code.
688 *
689 * Results:
690 * Places an error in the interpreter.
691 *
692 * Side effects:
693 * Sets errorCode as well.
694 *
695 *----------------------------------------------------------------------
696 */
697
698 void
699 TclRegError(interp, msg, status)
700 Tcl_Interp *interp; /* Interpreter for error reporting. */
701 char *msg; /* Message to prepend to error. */
702 int status; /* Status code to report. */
703 {
704 char buf[100]; /* ample in practice */
705 char cbuf[100]; /* lots in practice */
706 size_t n;
707 char *p;
708
709 Tcl_ResetResult(interp);
710 n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
711 p = (n > sizeof(buf)) ? "..." : "";
712 Tcl_AppendResult(interp, msg, buf, p, NULL);
713
714 sprintf(cbuf, "%d", status);
715 (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
716 Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
717 }
718
719
720 /*
721 *----------------------------------------------------------------------
722 *
723 * FreeRegexpInternalRep --
724 *
725 * Deallocate the storage associated with a regexp object's internal
726 * representation.
727 *
728 * Results:
729 * None.
730 *
731 * Side effects:
732 * Frees the compiled regular expression.
733 *
734 *----------------------------------------------------------------------
735 */
736
737 static void
738 FreeRegexpInternalRep(objPtr)
739 Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */
740 {
741 TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
742
743 /*
744 * If this is the last reference to the regexp, free it.
745 */
746
747 if (--(regexpRepPtr->refCount) <= 0) {
748 FreeRegexp(regexpRepPtr);
749 }
750 }
751
752 /*
753 *----------------------------------------------------------------------
754 *
755 * DupRegexpInternalRep --
756 *
757 * We copy the reference to the compiled regexp and bump its
758 * reference count.
759 *
760 * Results:
761 * None.
762 *
763 * Side effects:
764 * Increments the reference count of the regexp.
765 *
766 *----------------------------------------------------------------------
767 */
768
769 static void
770 DupRegexpInternalRep(srcPtr, copyPtr)
771 Tcl_Obj *srcPtr; /* Object with internal rep to copy. */
772 Tcl_Obj *copyPtr; /* Object with internal rep to set. */
773 {
774 TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
775 regexpPtr->refCount++;
776 copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
777 copyPtr->typePtr = &tclRegexpType;
778 }
779
780 /*
781 *----------------------------------------------------------------------
782 *
783 * SetRegexpFromAny --
784 *
785 * Attempt to generate a compiled regular expression for the Tcl object
786 * "objPtr".
787 *
788 * Results:
789 * The return value is TCL_OK or TCL_ERROR. If an error occurs during
790 * conversion, an error message is left in the interpreter's result
791 * unless "interp" is NULL.
792 *
793 * Side effects:
794 * If no error occurs, a regular expression is stored as "objPtr"s
795 * internal representation.
796 *
797 *----------------------------------------------------------------------
798 */
799
800 static int
801 SetRegexpFromAny(interp, objPtr)
802 Tcl_Interp *interp; /* Used for error reporting if not NULL. */
803 Tcl_Obj *objPtr; /* The object to convert. */
804 {
805 if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
806 return TCL_ERROR;
807 }
808 return TCL_OK;
809 }
810
811 /*
812 *---------------------------------------------------------------------------
813 *
814 * CompileRegexp --
815 *
816 * Attempt to compile the given regexp pattern. If the compiled
817 * regular expression can be found in the per-thread cache, it
818 * will be used instead of compiling a new copy.
819 *
820 * Results:
821 * The return value is a pointer to a newly allocated TclRegexp
822 * that represents the compiled pattern, or NULL if the pattern
823 * could not be compiled. If NULL is returned, an error message is
824 * left in the interp's result.
825 *
826 * Side effects:
827 * The thread-local regexp cache is updated and a new TclRegexp may
828 * be allocated.
829 *
830 *----------------------------------------------------------------------
831 */
832
833 static TclRegexp *
834 CompileRegexp(interp, string, length, flags)
835 Tcl_Interp *interp; /* Used for error reporting if not NULL. */
836 char *string; /* The regexp to compile (UTF-8). */
837 int length; /* The length of the string in bytes. */
838 int flags; /* Compilation flags. */
839 {
840 TclRegexp *regexpPtr;
841 Tcl_UniChar *uniString;
842 int numChars;
843 Tcl_DString stringBuf;
844 int status, i;
845 ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
846
847 if (!tsdPtr->initialized) {
848 tsdPtr->initialized = 1;
849 Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
850 }
851
852 /*
853 * This routine maintains a second-level regular expression cache in
854 * addition to the per-object regexp cache. The per-thread cache is needed
855 * to handle the case where for various reasons the object is lost between
856 * invocations of the regexp command, but the literal pattern is the same.
857 */
858
859 /*
860 * Check the per-thread compiled regexp cache. We can only reuse
861 * a regexp if it has the same pattern and the same flags.
862 */
863
864 for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
865 if ((length == tsdPtr->patLengths[i])
866 && (tsdPtr->regexps[i]->flags == flags)
867 && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
868 /*
869 * Move the matched pattern to the first slot in the
870 * cache and shift the other patterns down one position.
871 */
872
873 if (i != 0) {
874 int j;
875 char *cachedString;
876
877 cachedString = tsdPtr->patterns[i];
878 regexpPtr = tsdPtr->regexps[i];
879 for (j = i-1; j >= 0; j--) {
880 tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
881 tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
882 tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
883 }
884 tsdPtr->patterns[0] = cachedString;
885 tsdPtr->patLengths[0] = length;
886 tsdPtr->regexps[0] = regexpPtr;
887 }
888 return tsdPtr->regexps[0];
889 }
890 }
891
892 /*
893 * This is a new expression, so compile it and add it to the cache.
894 */
895
896 regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
897 regexpPtr->objPtr = NULL;
898 regexpPtr->string = NULL;
899 regexpPtr->details.rm_extend.rm_so = -1;
900 regexpPtr->details.rm_extend.rm_eo = -1;
901
902 /*
903 * Get the up-to-date string representation and map to unicode.
904 */
905
906 Tcl_DStringInit(&stringBuf);
907 uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
908 numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
909
910 /*
911 * Compile the string and check for errors.
912 */
913
914 regexpPtr->flags = flags;
915 status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
916 Tcl_DStringFree(&stringBuf);
917
918 if (status != REG_OKAY) {
919 /*
920 * Clean up and report errors in the interpreter, if possible.
921 */
922
923 ckfree((char *)regexpPtr);
924 if (interp) {
925 TclRegError(interp,
926 "couldn't compile regular expression pattern: ",
927 status);
928 }
929 return NULL;
930 }
931
932 /*
933 * Allocate enough space for all of the subexpressions, plus one
934 * extra for the entire pattern.
935 */
936
937 regexpPtr->matches = (regmatch_t *) ckalloc(
938 sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
939
940 /*
941 * Initialize the refcount to one initially, since it is in the cache.
942 */
943
944 regexpPtr->refCount = 1;
945
946 /*
947 * Free the last regexp, if necessary, and make room at the head of the
948 * list for the new regexp.
949 */
950
951 if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
952 TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
953 if (--(oldRegexpPtr->refCount) <= 0) {
954 FreeRegexp(oldRegexpPtr);
955 }
956 ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
957 }
958 for (i = NUM_REGEXPS - 2; i >= 0; i--) {
959 tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
960 tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
961 tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
962 }
963 tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
964 strcpy(tsdPtr->patterns[0], string);
965 tsdPtr->patLengths[0] = length;
966 tsdPtr->regexps[0] = regexpPtr;
967
968 return regexpPtr;
969 }
970
971 /*
972 *----------------------------------------------------------------------
973 *
974 * FreeRegexp --
975 *
976 * Release the storage associated with a TclRegexp.
977 *
978 * Results:
979 * None.
980 *
981 * Side effects:
982 * None.
983 *
984 *----------------------------------------------------------------------
985 */
986
987 static void
988 FreeRegexp(regexpPtr)
989 TclRegexp *regexpPtr; /* Compiled regular expression to free. */
990 {
991 TclReFree(&regexpPtr->re);
992 if (regexpPtr->matches) {
993 ckfree((char *) regexpPtr->matches);
994 }
995 ckfree((char *) regexpPtr);
996 }
997
998 /*
999 *----------------------------------------------------------------------
1000 *
1001 * FinalizeRegexp --
1002 *
1003 * Release the storage associated with the per-thread regexp
1004 * cache.
1005 *
1006 * Results:
1007 * None.
1008 *
1009 * Side effects:
1010 * None.
1011 *
1012 *----------------------------------------------------------------------
1013 */
1014
1015 static void
1016 FinalizeRegexp(clientData)
1017 ClientData clientData; /* Not used. */
1018 {
1019 int i;
1020 TclRegexp *regexpPtr;
1021 ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1022
1023 for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1024 regexpPtr = tsdPtr->regexps[i];
1025 if (--(regexpPtr->refCount) <= 0) {
1026 FreeRegexp(regexpPtr);
1027 }
1028 ckfree(tsdPtr->patterns[i]);
1029 }
1030 }
1031
1032 /* End of tclregexp.c */

Properties

Name Value
svn:eol-style native
svn:keywords Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25