/[dtapublic]/projs/ets/trunk/src/c_tcl_base_7_5_w_mods/tclregexp.c
ViewVC logotype

Annotation of /projs/ets/trunk/src/c_tcl_base_7_5_w_mods/tclregexp.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 220 - (hide annotations) (download)
Sun Jul 22 15:58:07 2018 UTC (5 years, 11 months ago) by dashley
File MIME type: text/plain
File size: 30066 byte(s)
Reorganize.
1 dashley 71 /* $Header$ */
2     /*
3     * tclRegexp.c --
4     *
5     * This file contains the public interfaces to the Tcl regular
6     * expression mechanism.
7     *
8     * Copyright (c) 1998 by Sun Microsystems, Inc.
9     * Copyright (c) 1998-1999 by Scriptics Corporation.
10     *
11     * See the file "license.terms" for information on usage and redistribution
12     * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
13     *
14     * RCS: @(#) $Id: tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $
15     */
16    
17     #include "tclInt.h"
18     #include "tclPort.h"
19     #include "tclRegexp.h"
20    
21     /*
22     *----------------------------------------------------------------------
23     * The routines in this file use Henry Spencer's regular expression
24     * package contained in the following additional source files:
25     *
26     * regc_color.c regc_cvec.c regc_lex.c
27     * regc_nfa.c regcomp.c regcustom.h
28     * rege_dfa.c regerror.c regerrs.h
29     * regex.h regexec.c regfree.c
30     * regfronts.c regguts.h
31     *
32     * Copyright (c) 1998 Henry Spencer. All rights reserved.
33     *
34     * Development of this software was funded, in part, by Cray Research Inc.,
35     * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
36     * Corporation, none of whom are responsible for the results. The author
37     * thanks all of them.
38     *
39     * Redistribution and use in source and binary forms -- with or without
40     * modification -- are permitted for any purpose, provided that
41     * redistributions in source form retain this entire copyright notice and
42     * indicate the origin and nature of any modifications.
43     *
44     * I'd appreciate being given credit for this package in the documentation
45     * of software which uses it, but that is not a requirement.
46     *
47     * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
48     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
49     * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
50     * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
51     * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
52     * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
53     * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
54     * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
55     * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
56     * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57     *
58     * *** NOTE: this code has been altered slightly for use in Tcl: ***
59     * *** 1. Names have been changed, e.g. from re_comp to ***
60     * *** TclRegComp, to avoid clashes with other ***
61     * *** regexp implementations used by applications. ***
62     */
63    
64     /*
65     * Thread local storage used to maintain a per-thread cache of compiled
66     * regular expressions.
67     */
68    
69     #define NUM_REGEXPS 30
70    
71     typedef struct ThreadSpecificData {
72     int initialized; /* Set to 1 when the module is initialized. */
73     char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
74     * regular expression patterns. NULL
75     * means that this slot isn't used.
76     * Malloc-ed. */
77     int patLengths[NUM_REGEXPS];/* Number of non-null characters in
78     * corresponding entry in patterns.
79     * -1 means entry isn't used. */
80     struct TclRegexp *regexps[NUM_REGEXPS];
81     /* Compiled forms of above strings. Also
82     * malloc-ed, or NULL if not in use yet. */
83     } ThreadSpecificData;
84    
85     static Tcl_ThreadDataKey dataKey;
86    
87     /*
88     * Declarations for functions used only in this file.
89     */
90    
91     static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
92     char *pattern, int length, int flags));
93     static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
94     Tcl_Obj *copyPtr));
95     static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
96     static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
97     static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
98     static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
99     Tcl_RegExp re, CONST Tcl_UniChar *uniString,
100     int numChars, int nmatches, int flags));
101     static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
102     Tcl_Obj *objPtr));
103    
104     /*
105     * The regular expression Tcl object type. This serves as a cache
106     * of the compiled form of the regular expression.
107     */
108    
109     Tcl_ObjType tclRegexpType = {
110     "regexp", /* name */
111     FreeRegexpInternalRep, /* freeIntRepProc */
112     DupRegexpInternalRep, /* dupIntRepProc */
113     NULL, /* updateStringProc */
114     SetRegexpFromAny /* setFromAnyProc */
115     };
116    
117    
118     /*
119     *----------------------------------------------------------------------
120     *
121     * Tcl_RegExpCompile --
122     *
123     * Compile a regular expression into a form suitable for fast
124     * matching. This procedure is DEPRECATED in favor of the
125     * object version of the command.
126     *
127     * Results:
128     * The return value is a pointer to the compiled form of string,
129     * suitable for passing to Tcl_RegExpExec. This compiled form
130     * is only valid up until the next call to this procedure, so
131     * don't keep these around for a long time! If an error occurred
132     * while compiling the pattern, then NULL is returned and an error
133     * message is left in the interp's result.
134     *
135     * Side effects:
136     * Updates the cache of compiled regexps.
137     *
138     *----------------------------------------------------------------------
139     */
140    
141     Tcl_RegExp
142     Tcl_RegExpCompile(interp, string)
143     Tcl_Interp *interp; /* For use in error reporting and
144     * to access the interp regexp cache. */
145     char *string; /* String for which to produce
146     * compiled regular expression. */
147     {
148     return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
149     REG_ADVANCED);
150     }
151    
152     /*
153     *----------------------------------------------------------------------
154     *
155     * Tcl_RegExpExec --
156     *
157     * Execute the regular expression matcher using a compiled form
158     * of a regular expression and save information about any match
159     * that is found.
160     *
161     * Results:
162     * If an error occurs during the matching operation then -1
163     * is returned and the interp's result contains an error message.
164     * Otherwise the return value is 1 if a matching range is
165     * found and 0 if there is no matching range.
166     *
167     * Side effects:
168     * None.
169     *
170     *----------------------------------------------------------------------
171     */
172    
173     int
174     Tcl_RegExpExec(interp, re, string, start)
175     Tcl_Interp *interp; /* Interpreter to use for error reporting. */
176     Tcl_RegExp re; /* Compiled regular expression; must have
177     * been returned by previous call to
178     * Tcl_GetRegExpFromObj. */
179     CONST char *string; /* String against which to match re. */
180     CONST char *start; /* If string is part of a larger string,
181     * this identifies beginning of larger
182     * string, so that "^" won't match. */
183     {
184     int flags, result, numChars;
185     TclRegexp *regexp = (TclRegexp *)re;
186     Tcl_DString ds;
187     Tcl_UniChar *ustr;
188    
189     /*
190     * If the starting point is offset from the beginning of the buffer,
191     * then we need to tell the regexp engine not to match "^".
192     */
193    
194     if (string > start) {
195     flags = REG_NOTBOL;
196     } else {
197     flags = 0;
198     }
199    
200     /*
201     * Remember the string for use by Tcl_RegExpRange().
202     */
203    
204     regexp->string = string;
205     regexp->objPtr = NULL;
206    
207     /*
208     * Convert the string to Unicode and perform the match.
209     */
210    
211     Tcl_DStringInit(&ds);
212     ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
213     numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
214     result = RegExpExecUniChar(interp, re, ustr, numChars,
215     -1 /* nmatches */, flags);
216     Tcl_DStringFree(&ds);
217    
218     return result;
219     }
220    
221     /*
222     *---------------------------------------------------------------------------
223     *
224     * Tcl_RegExpRange --
225     *
226     * Returns pointers describing the range of a regular expression match,
227     * or one of the subranges within the match.
228     *
229     * Results:
230     * The variables at *startPtr and *endPtr are modified to hold the
231     * addresses of the endpoints of the range given by index. If the
232     * specified range doesn't exist then NULLs are returned.
233     *
234     * Side effects:
235     * None.
236     *
237     *---------------------------------------------------------------------------
238     */
239    
240     void
241     Tcl_RegExpRange(re, index, startPtr, endPtr)
242     Tcl_RegExp re; /* Compiled regular expression that has
243     * been passed to Tcl_RegExpExec. */
244     int index; /* 0 means give the range of the entire
245     * match, > 0 means give the range of
246     * a matching subrange. */
247     char **startPtr; /* Store address of first character in
248     * (sub-) range here. */
249     char **endPtr; /* Store address of character just after last
250     * in (sub-) range here. */
251     {
252     TclRegexp *regexpPtr = (TclRegexp *) re;
253     CONST char *string;
254    
255     if ((size_t) index > regexpPtr->re.re_nsub) {
256     *startPtr = *endPtr = NULL;
257     } else if (regexpPtr->matches[index].rm_so < 0) {
258     *startPtr = *endPtr = NULL;
259     } else {
260     if (regexpPtr->objPtr) {
261     string = Tcl_GetString(regexpPtr->objPtr);
262     } else {
263     string = regexpPtr->string;
264     }
265     *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
266     *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
267     }
268     }
269    
270     /*
271     *---------------------------------------------------------------------------
272     *
273     * RegExpExecUniChar --
274     *
275     * Execute the regular expression matcher using a compiled form of a
276     * regular expression and save information about any match that is
277     * found.
278     *
279     * Results:
280     * If an error occurs during the matching operation then -1 is
281     * returned and an error message is left in interp's result.
282     * Otherwise the return value is 1 if a matching range was found or
283     * 0 if there was no matching range.
284     *
285     * Side effects:
286     * None.
287     *
288     *----------------------------------------------------------------------
289     */
290    
291     static int
292     RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
293     Tcl_Interp *interp; /* Interpreter to use for error reporting. */
294     Tcl_RegExp re; /* Compiled regular expression; returned by
295     * a previous call to Tcl_GetRegExpFromObj */
296     CONST Tcl_UniChar *wString; /* String against which to match re. */
297     int numChars; /* Length of Tcl_UniChar string (must
298     * be >= 0). */
299     int nmatches; /* How many subexpression matches (counting
300     * the whole match as subexpression 0) are
301     * of interest. -1 means "don't know". */
302     int flags; /* Regular expression flags. */
303     {
304     int status;
305     TclRegexp *regexpPtr = (TclRegexp *) re;
306     size_t last = regexpPtr->re.re_nsub + 1;
307     size_t nm = last;
308    
309     if (nmatches >= 0 && (size_t) nmatches < nm) {
310     nm = (size_t) nmatches;
311     }
312    
313     status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
314     &regexpPtr->details, nm, regexpPtr->matches, flags);
315    
316     /*
317     * Check for errors.
318     */
319    
320     if (status != REG_OKAY) {
321     if (status == REG_NOMATCH) {
322     return 0;
323     }
324     if (interp != NULL) {
325     TclRegError(interp, "error while matching regular expression: ",
326     status);
327     }
328     return -1;
329     }
330     return 1;
331     }
332    
333     /*
334     *---------------------------------------------------------------------------
335     *
336     * TclRegExpRangeUniChar --
337     *
338     * Returns pointers describing the range of a regular expression match,
339     * or one of the subranges within the match, or the hypothetical range
340     * represented by the rm_extend field of the rm_detail_t.
341     *
342     * Results:
343     * The variables at *startPtr and *endPtr are modified to hold the
344     * offsets of the endpoints of the range given by index. If the
345     * specified range doesn't exist then -1s are supplied.
346     *
347     * Side effects:
348     * None.
349     *
350     *---------------------------------------------------------------------------
351     */
352    
353     void
354     TclRegExpRangeUniChar(re, index, startPtr, endPtr)
355     Tcl_RegExp re; /* Compiled regular expression that has
356     * been passed to Tcl_RegExpExec. */
357     int index; /* 0 means give the range of the entire
358     * match, > 0 means give the range of
359     * a matching subrange, -1 means the
360     * range of the rm_extend field. */
361     int *startPtr; /* Store address of first character in
362     * (sub-) range here. */
363     int *endPtr; /* Store address of character just after last
364     * in (sub-) range here. */
365     {
366     TclRegexp *regexpPtr = (TclRegexp *) re;
367    
368     if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
369     *startPtr = regexpPtr->details.rm_extend.rm_so;
370     *endPtr = regexpPtr->details.rm_extend.rm_eo;
371     } else if ((size_t) index > regexpPtr->re.re_nsub) {
372     *startPtr = -1;
373     *endPtr = -1;
374     } else {
375     *startPtr = regexpPtr->matches[index].rm_so;
376     *endPtr = regexpPtr->matches[index].rm_eo;
377     }
378     }
379    
380     /*
381     *----------------------------------------------------------------------
382     *
383     * Tcl_RegExpMatch --
384     *
385     * See if a string matches a regular expression.
386     *
387     * Results:
388     * If an error occurs during the matching operation then -1
389     * is returned and the interp's result contains an error message.
390     * Otherwise the return value is 1 if "string" matches "pattern"
391     * and 0 otherwise.
392     *
393     * Side effects:
394     * None.
395     *
396     *----------------------------------------------------------------------
397     */
398    
399     int
400     Tcl_RegExpMatch(interp, string, pattern)
401     Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
402     char *string; /* String. */
403     char *pattern; /* Regular expression to match against
404     * string. */
405     {
406     Tcl_RegExp re;
407    
408     re = Tcl_RegExpCompile(interp, pattern);
409     if (re == NULL) {
410     return -1;
411     }
412     return Tcl_RegExpExec(interp, re, string, string);
413     }
414    
415     /*
416     *----------------------------------------------------------------------
417     *
418     * Tcl_RegExpExecObj --
419     *
420     * Execute a precompiled regexp against the given object.
421     *
422     * Results:
423     * If an error occurs during the matching operation then -1
424     * is returned and the interp's result contains an error message.
425     * Otherwise the return value is 1 if "string" matches "pattern"
426     * and 0 otherwise.
427     *
428     * Side effects:
429     * Converts the object to a Unicode object.
430     *
431     *----------------------------------------------------------------------
432     */
433    
434     int
435     Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
436     Tcl_Interp *interp; /* Interpreter to use for error reporting. */
437     Tcl_RegExp re; /* Compiled regular expression; must have
438     * been returned by previous call to
439     * Tcl_GetRegExpFromObj. */
440     Tcl_Obj *objPtr; /* String against which to match re. */
441     int offset; /* Character index that marks where matching
442     * should begin. */
443     int nmatches; /* How many subexpression matches (counting
444     * the whole match as subexpression 0) are
445     * of interest. -1 means all of them. */
446     int flags; /* Regular expression execution flags. */
447     {
448     TclRegexp *regexpPtr = (TclRegexp *) re;
449     Tcl_UniChar *udata;
450     int length;
451    
452     /*
453     * Save the target object so we can extract strings from it later.
454     */
455    
456     regexpPtr->string = NULL;
457     regexpPtr->objPtr = objPtr;
458    
459     udata = Tcl_GetUnicode(objPtr);
460     length = Tcl_GetCharLength(objPtr);
461    
462     if (offset > length) {
463     offset = length;
464     }
465     udata += offset;
466     length -= offset;
467    
468     return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
469     }
470    
471     /*
472     *----------------------------------------------------------------------
473     *
474     * Tcl_RegExpMatchObj --
475     *
476     * See if an object matches a regular expression.
477     *
478     * Results:
479     * If an error occurs during the matching operation then -1
480     * is returned and the interp's result contains an error message.
481     * Otherwise the return value is 1 if "string" matches "pattern"
482     * and 0 otherwise.
483     *
484     * Side effects:
485     * Changes the internal rep of the pattern and string objects.
486     *
487     *----------------------------------------------------------------------
488     */
489    
490     int
491     Tcl_RegExpMatchObj(interp, stringObj, patternObj)
492     Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
493     Tcl_Obj *stringObj; /* Object containing the String to search. */
494     Tcl_Obj *patternObj; /* Regular expression to match against
495     * string. */
496     {
497     Tcl_RegExp re;
498    
499     re = Tcl_GetRegExpFromObj(interp, patternObj,
500     TCL_REG_ADVANCED | TCL_REG_NOSUB);
501     if (re == NULL) {
502     return -1;
503     }
504     return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
505     0 /* nmatches */, 0 /* flags */);
506     }
507    
508     /*
509     *----------------------------------------------------------------------
510     *
511     * Tcl_RegExpGetInfo --
512     *
513     * Retrieve information about the current match.
514     *
515     * Results:
516     * None.
517     *
518     * Side effects:
519     * None.
520     *
521     *----------------------------------------------------------------------
522     */
523    
524     void
525     Tcl_RegExpGetInfo(regexp, infoPtr)
526     Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
527     Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
528     {
529     TclRegexp *regexpPtr = (TclRegexp *) regexp;
530    
531     infoPtr->nsubs = regexpPtr->re.re_nsub;
532     infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
533     infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
534     }
535    
536     /*
537     *----------------------------------------------------------------------
538     *
539     * Tcl_GetRegExpFromObj --
540     *
541     * Compile a regular expression into a form suitable for fast
542     * matching. This procedure caches the result in a Tcl_Obj.
543     *
544     * Results:
545     * The return value is a pointer to the compiled form of string,
546     * suitable for passing to Tcl_RegExpExec. If an error occurred
547     * while compiling the pattern, then NULL is returned and an error
548     * message is left in the interp's result.
549     *
550     * Side effects:
551     * Updates the native rep of the Tcl_Obj.
552     *
553     *----------------------------------------------------------------------
554     */
555    
556     Tcl_RegExp
557     Tcl_GetRegExpFromObj(interp, objPtr, flags)
558     Tcl_Interp *interp; /* For use in error reporting, and to access
559     * the interp regexp cache. */
560     Tcl_Obj *objPtr; /* Object whose string rep contains regular
561     * expression pattern. Internal rep will be
562     * changed to compiled form of this regular
563     * expression. */
564     int flags; /* Regular expression compilation flags. */
565     {
566     int length;
567     Tcl_ObjType *typePtr;
568     TclRegexp *regexpPtr;
569     char *pattern;
570    
571     typePtr = objPtr->typePtr;
572     regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
573    
574     if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
575     pattern = Tcl_GetStringFromObj(objPtr, &length);
576    
577     regexpPtr = CompileRegexp(interp, pattern, length, flags);
578     if (regexpPtr == NULL) {
579     return NULL;
580     }
581    
582     /*
583     * Add a reference to the regexp so it will persist even if it is
584     * pushed out of the current thread's regexp cache. This reference
585     * will be removed when the object's internal rep is freed.
586     */
587    
588     regexpPtr->refCount++;
589    
590     /*
591     * Free the old representation and set our type.
592     */
593    
594     if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
595     (*typePtr->freeIntRepProc)(objPtr);
596     }
597     objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
598     objPtr->typePtr = &tclRegexpType;
599     }
600     return (Tcl_RegExp) regexpPtr;
601     }
602    
603     /*
604     *----------------------------------------------------------------------
605     *
606     * TclRegAbout --
607     *
608     * Return information about a compiled regular expression.
609     *
610     * Results:
611     * The return value is -1 for failure, 0 for success, although at
612     * the moment there's nothing that could fail. On success, a list
613     * is left in the interp's result: first element is the subexpression
614     * count, second is a list of re_info bit names.
615     *
616     * Side effects:
617     * None.
618     *
619     *----------------------------------------------------------------------
620     */
621    
622     int
623     TclRegAbout(interp, re)
624     Tcl_Interp *interp; /* For use in variable assignment. */
625     Tcl_RegExp re; /* The compiled regular expression. */
626     {
627     TclRegexp *regexpPtr = (TclRegexp *)re;
628     char buf[TCL_INTEGER_SPACE];
629     static struct infoname {
630     int bit;
631     char *text;
632     } infonames[] = {
633     {REG_UBACKREF, "REG_UBACKREF"},
634     {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"},
635     {REG_UBOUNDS, "REG_UBOUNDS"},
636     {REG_UBRACES, "REG_UBRACES"},
637     {REG_UBSALNUM, "REG_UBSALNUM"},
638     {REG_UPBOTCH, "REG_UPBOTCH"},
639     {REG_UBBS, "REG_UBBS"},
640     {REG_UNONPOSIX, "REG_UNONPOSIX"},
641     {REG_UUNSPEC, "REG_UUNSPEC"},
642     {REG_UUNPORT, "REG_UUNPORT"},
643     {REG_ULOCALE, "REG_ULOCALE"},
644     {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
645     {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
646     {REG_USHORTEST, "REG_USHORTEST"},
647     {0, ""}
648     };
649     struct infoname *inf;
650     int n;
651    
652     Tcl_ResetResult(interp);
653    
654     sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
655     Tcl_AppendElement(interp, buf);
656    
657     /*
658     * Must count bits before generating list, because we must know
659     * whether {} are needed before we start appending names.
660     */
661     n = 0;
662     for (inf = infonames; inf->bit != 0; inf++) {
663     if (regexpPtr->re.re_info&inf->bit) {
664     n++;
665     }
666     }
667     if (n != 1) {
668     Tcl_AppendResult(interp, " {", NULL);
669     }
670     for (inf = infonames; inf->bit != 0; inf++) {
671     if (regexpPtr->re.re_info&inf->bit) {
672     Tcl_AppendElement(interp, inf->text);
673     }
674     }
675     if (n != 1) {
676     Tcl_AppendResult(interp, "}", NULL);
677     }
678    
679     return 0;
680     }
681    
682     /*
683     *----------------------------------------------------------------------
684     *
685     * TclRegError --
686     *
687     * Generate an error message based on the regexp status code.
688     *
689     * Results:
690     * Places an error in the interpreter.
691     *
692     * Side effects:
693     * Sets errorCode as well.
694     *
695     *----------------------------------------------------------------------
696     */
697    
698     void
699     TclRegError(interp, msg, status)
700     Tcl_Interp *interp; /* Interpreter for error reporting. */
701     char *msg; /* Message to prepend to error. */
702     int status; /* Status code to report. */
703     {
704     char buf[100]; /* ample in practice */
705     char cbuf[100]; /* lots in practice */
706     size_t n;
707     char *p;
708    
709     Tcl_ResetResult(interp);
710     n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
711     p = (n > sizeof(buf)) ? "..." : "";
712     Tcl_AppendResult(interp, msg, buf, p, NULL);
713    
714     sprintf(cbuf, "%d", status);
715     (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
716     Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
717     }
718    
719    
720     /*
721     *----------------------------------------------------------------------
722     *
723     * FreeRegexpInternalRep --
724     *
725     * Deallocate the storage associated with a regexp object's internal
726     * representation.
727     *
728     * Results:
729     * None.
730     *
731     * Side effects:
732     * Frees the compiled regular expression.
733     *
734     *----------------------------------------------------------------------
735     */
736    
737     static void
738     FreeRegexpInternalRep(objPtr)
739     Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */
740     {
741     TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
742    
743     /*
744     * If this is the last reference to the regexp, free it.
745     */
746    
747     if (--(regexpRepPtr->refCount) <= 0) {
748     FreeRegexp(regexpRepPtr);
749     }
750     }
751    
752     /*
753     *----------------------------------------------------------------------
754     *
755     * DupRegexpInternalRep --
756     *
757     * We copy the reference to the compiled regexp and bump its
758     * reference count.
759     *
760     * Results:
761     * None.
762     *
763     * Side effects:
764     * Increments the reference count of the regexp.
765     *
766     *----------------------------------------------------------------------
767     */
768    
769     static void
770     DupRegexpInternalRep(srcPtr, copyPtr)
771     Tcl_Obj *srcPtr; /* Object with internal rep to copy. */
772     Tcl_Obj *copyPtr; /* Object with internal rep to set. */
773     {
774     TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
775     regexpPtr->refCount++;
776     copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
777     copyPtr->typePtr = &tclRegexpType;
778     }
779    
780     /*
781     *----------------------------------------------------------------------
782     *
783     * SetRegexpFromAny --
784     *
785     * Attempt to generate a compiled regular expression for the Tcl object
786     * "objPtr".
787     *
788     * Results:
789     * The return value is TCL_OK or TCL_ERROR. If an error occurs during
790     * conversion, an error message is left in the interpreter's result
791     * unless "interp" is NULL.
792     *
793     * Side effects:
794     * If no error occurs, a regular expression is stored as "objPtr"s
795     * internal representation.
796     *
797     *----------------------------------------------------------------------
798     */
799    
800     static int
801     SetRegexpFromAny(interp, objPtr)
802     Tcl_Interp *interp; /* Used for error reporting if not NULL. */
803     Tcl_Obj *objPtr; /* The object to convert. */
804     {
805     if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
806     return TCL_ERROR;
807     }
808     return TCL_OK;
809     }
810    
811     /*
812     *---------------------------------------------------------------------------
813     *
814     * CompileRegexp --
815     *
816     * Attempt to compile the given regexp pattern. If the compiled
817     * regular expression can be found in the per-thread cache, it
818     * will be used instead of compiling a new copy.
819     *
820     * Results:
821     * The return value is a pointer to a newly allocated TclRegexp
822     * that represents the compiled pattern, or NULL if the pattern
823     * could not be compiled. If NULL is returned, an error message is
824     * left in the interp's result.
825     *
826     * Side effects:
827     * The thread-local regexp cache is updated and a new TclRegexp may
828     * be allocated.
829     *
830     *----------------------------------------------------------------------
831     */
832    
833     static TclRegexp *
834     CompileRegexp(interp, string, length, flags)
835     Tcl_Interp *interp; /* Used for error reporting if not NULL. */
836     char *string; /* The regexp to compile (UTF-8). */
837     int length; /* The length of the string in bytes. */
838     int flags; /* Compilation flags. */
839     {
840     TclRegexp *regexpPtr;
841     Tcl_UniChar *uniString;
842     int numChars;
843     Tcl_DString stringBuf;
844     int status, i;
845     ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
846    
847     if (!tsdPtr->initialized) {
848     tsdPtr->initialized = 1;
849     Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
850     }
851    
852     /*
853     * This routine maintains a second-level regular expression cache in
854     * addition to the per-object regexp cache. The per-thread cache is needed
855     * to handle the case where for various reasons the object is lost between
856     * invocations of the regexp command, but the literal pattern is the same.
857     */
858    
859     /*
860     * Check the per-thread compiled regexp cache. We can only reuse
861     * a regexp if it has the same pattern and the same flags.
862     */
863    
864     for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
865     if ((length == tsdPtr->patLengths[i])
866     && (tsdPtr->regexps[i]->flags == flags)
867     && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
868     /*
869     * Move the matched pattern to the first slot in the
870     * cache and shift the other patterns down one position.
871     */
872    
873     if (i != 0) {
874     int j;
875     char *cachedString;
876    
877     cachedString = tsdPtr->patterns[i];
878     regexpPtr = tsdPtr->regexps[i];
879     for (j = i-1; j >= 0; j--) {
880     tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
881     tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
882     tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
883     }
884     tsdPtr->patterns[0] = cachedString;
885     tsdPtr->patLengths[0] = length;
886     tsdPtr->regexps[0] = regexpPtr;
887     }
888     return tsdPtr->regexps[0];
889     }
890     }
891    
892     /*
893     * This is a new expression, so compile it and add it to the cache.
894     */
895    
896     regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
897     regexpPtr->objPtr = NULL;
898     regexpPtr->string = NULL;
899     regexpPtr->details.rm_extend.rm_so = -1;
900     regexpPtr->details.rm_extend.rm_eo = -1;
901    
902     /*
903     * Get the up-to-date string representation and map to unicode.
904     */
905    
906     Tcl_DStringInit(&stringBuf);
907     uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
908     numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
909    
910     /*
911     * Compile the string and check for errors.
912     */
913    
914     regexpPtr->flags = flags;
915     status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
916     Tcl_DStringFree(&stringBuf);
917    
918     if (status != REG_OKAY) {
919     /*
920     * Clean up and report errors in the interpreter, if possible.
921     */
922    
923     ckfree((char *)regexpPtr);
924     if (interp) {
925     TclRegError(interp,
926     "couldn't compile regular expression pattern: ",
927     status);
928     }
929     return NULL;
930     }
931    
932     /*
933     * Allocate enough space for all of the subexpressions, plus one
934     * extra for the entire pattern.
935     */
936    
937     regexpPtr->matches = (regmatch_t *) ckalloc(
938     sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
939    
940     /*
941     * Initialize the refcount to one initially, since it is in the cache.
942     */
943    
944     regexpPtr->refCount = 1;
945    
946     /*
947     * Free the last regexp, if necessary, and make room at the head of the
948     * list for the new regexp.
949     */
950    
951     if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
952     TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
953     if (--(oldRegexpPtr->refCount) <= 0) {
954     FreeRegexp(oldRegexpPtr);
955     }
956     ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
957     }
958     for (i = NUM_REGEXPS - 2; i >= 0; i--) {
959     tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
960     tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
961     tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
962     }
963     tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
964     strcpy(tsdPtr->patterns[0], string);
965     tsdPtr->patLengths[0] = length;
966     tsdPtr->regexps[0] = regexpPtr;
967    
968     return regexpPtr;
969     }
970    
971     /*
972     *----------------------------------------------------------------------
973     *
974     * FreeRegexp --
975     *
976     * Release the storage associated with a TclRegexp.
977     *
978     * Results:
979     * None.
980     *
981     * Side effects:
982     * None.
983     *
984     *----------------------------------------------------------------------
985     */
986    
987     static void
988     FreeRegexp(regexpPtr)
989     TclRegexp *regexpPtr; /* Compiled regular expression to free. */
990     {
991     TclReFree(&regexpPtr->re);
992     if (regexpPtr->matches) {
993     ckfree((char *) regexpPtr->matches);
994     }
995     ckfree((char *) regexpPtr);
996     }
997    
998     /*
999     *----------------------------------------------------------------------
1000     *
1001     * FinalizeRegexp --
1002     *
1003     * Release the storage associated with the per-thread regexp
1004     * cache.
1005     *
1006     * Results:
1007     * None.
1008     *
1009     * Side effects:
1010     * None.
1011     *
1012     *----------------------------------------------------------------------
1013     */
1014    
1015     static void
1016     FinalizeRegexp(clientData)
1017     ClientData clientData; /* Not used. */
1018     {
1019     int i;
1020     TclRegexp *regexpPtr;
1021     ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1022    
1023     for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1024     regexpPtr = tsdPtr->regexps[i];
1025     if (--(regexpPtr->refCount) <= 0) {
1026     FreeRegexp(regexpPtr);
1027     }
1028     ckfree(tsdPtr->patterns[i]);
1029     }
1030     }
1031    
1032     /* End of tclregexp.c */

Properties

Name Value
svn:eol-style native
svn:keywords Header

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25