/[dtapublic]/projs/trunk/shared_source/c_tcl_base_7_5_w_mods/tclregexp.c
ViewVC logotype

Annotation of /projs/trunk/shared_source/c_tcl_base_7_5_w_mods/tclregexp.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 42 - (hide annotations) (download)
Fri Oct 14 01:50:00 2016 UTC (8 years ago) by dashley
Original Path: projs/trunk/shared_source/tcl_base/tclregexp.c
File MIME type: text/plain
File size: 31427 byte(s)
Move shared source code to commonize.
1 dashley 25 /* $Header: /cvsroot/esrg/sfesrg/esrgpcpj/shared/tcl_base/tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $ */
2    
3     /*
4     * tclRegexp.c --
5     *
6     * This file contains the public interfaces to the Tcl regular
7     * expression mechanism.
8     *
9     * Copyright (c) 1998 by Sun Microsystems, Inc.
10     * Copyright (c) 1998-1999 by Scriptics Corporation.
11     *
12     * See the file "license.terms" for information on usage and redistribution
13     * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
14     *
15     * RCS: @(#) $Id: tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $
16     */
17    
18     #include "tclInt.h"
19     #include "tclPort.h"
20     #include "tclRegexp.h"
21    
22     /*
23     *----------------------------------------------------------------------
24     * The routines in this file use Henry Spencer's regular expression
25     * package contained in the following additional source files:
26     *
27     * regc_color.c regc_cvec.c regc_lex.c
28     * regc_nfa.c regcomp.c regcustom.h
29     * rege_dfa.c regerror.c regerrs.h
30     * regex.h regexec.c regfree.c
31     * regfronts.c regguts.h
32     *
33     * Copyright (c) 1998 Henry Spencer. All rights reserved.
34     *
35     * Development of this software was funded, in part, by Cray Research Inc.,
36     * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
37     * Corporation, none of whom are responsible for the results. The author
38     * thanks all of them.
39     *
40     * Redistribution and use in source and binary forms -- with or without
41     * modification -- are permitted for any purpose, provided that
42     * redistributions in source form retain this entire copyright notice and
43     * indicate the origin and nature of any modifications.
44     *
45     * I'd appreciate being given credit for this package in the documentation
46     * of software which uses it, but that is not a requirement.
47     *
48     * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
49     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
50     * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
51     * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
52     * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
53     * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
54     * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
55     * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
56     * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
57     * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58     *
59     * *** NOTE: this code has been altered slightly for use in Tcl: ***
60     * *** 1. Names have been changed, e.g. from re_comp to ***
61     * *** TclRegComp, to avoid clashes with other ***
62     * *** regexp implementations used by applications. ***
63     */
64    
65     /*
66     * Thread local storage used to maintain a per-thread cache of compiled
67     * regular expressions.
68     */
69    
70     #define NUM_REGEXPS 30
71    
72     typedef struct ThreadSpecificData {
73     int initialized; /* Set to 1 when the module is initialized. */
74     char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
75     * regular expression patterns. NULL
76     * means that this slot isn't used.
77     * Malloc-ed. */
78     int patLengths[NUM_REGEXPS];/* Number of non-null characters in
79     * corresponding entry in patterns.
80     * -1 means entry isn't used. */
81     struct TclRegexp *regexps[NUM_REGEXPS];
82     /* Compiled forms of above strings. Also
83     * malloc-ed, or NULL if not in use yet. */
84     } ThreadSpecificData;
85    
86     static Tcl_ThreadDataKey dataKey;
87    
88     /*
89     * Declarations for functions used only in this file.
90     */
91    
92     static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
93     char *pattern, int length, int flags));
94     static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
95     Tcl_Obj *copyPtr));
96     static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
97     static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
98     static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
99     static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
100     Tcl_RegExp re, CONST Tcl_UniChar *uniString,
101     int numChars, int nmatches, int flags));
102     static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
103     Tcl_Obj *objPtr));
104    
105     /*
106     * The regular expression Tcl object type. This serves as a cache
107     * of the compiled form of the regular expression.
108     */
109    
110     Tcl_ObjType tclRegexpType = {
111     "regexp", /* name */
112     FreeRegexpInternalRep, /* freeIntRepProc */
113     DupRegexpInternalRep, /* dupIntRepProc */
114     NULL, /* updateStringProc */
115     SetRegexpFromAny /* setFromAnyProc */
116     };
117    
118    
119     /*
120     *----------------------------------------------------------------------
121     *
122     * Tcl_RegExpCompile --
123     *
124     * Compile a regular expression into a form suitable for fast
125     * matching. This procedure is DEPRECATED in favor of the
126     * object version of the command.
127     *
128     * Results:
129     * The return value is a pointer to the compiled form of string,
130     * suitable for passing to Tcl_RegExpExec. This compiled form
131     * is only valid up until the next call to this procedure, so
132     * don't keep these around for a long time! If an error occurred
133     * while compiling the pattern, then NULL is returned and an error
134     * message is left in the interp's result.
135     *
136     * Side effects:
137     * Updates the cache of compiled regexps.
138     *
139     *----------------------------------------------------------------------
140     */
141    
142     Tcl_RegExp
143     Tcl_RegExpCompile(interp, string)
144     Tcl_Interp *interp; /* For use in error reporting and
145     * to access the interp regexp cache. */
146     char *string; /* String for which to produce
147     * compiled regular expression. */
148     {
149     return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
150     REG_ADVANCED);
151     }
152    
153     /*
154     *----------------------------------------------------------------------
155     *
156     * Tcl_RegExpExec --
157     *
158     * Execute the regular expression matcher using a compiled form
159     * of a regular expression and save information about any match
160     * that is found.
161     *
162     * Results:
163     * If an error occurs during the matching operation then -1
164     * is returned and the interp's result contains an error message.
165     * Otherwise the return value is 1 if a matching range is
166     * found and 0 if there is no matching range.
167     *
168     * Side effects:
169     * None.
170     *
171     *----------------------------------------------------------------------
172     */
173    
174     int
175     Tcl_RegExpExec(interp, re, string, start)
176     Tcl_Interp *interp; /* Interpreter to use for error reporting. */
177     Tcl_RegExp re; /* Compiled regular expression; must have
178     * been returned by previous call to
179     * Tcl_GetRegExpFromObj. */
180     CONST char *string; /* String against which to match re. */
181     CONST char *start; /* If string is part of a larger string,
182     * this identifies beginning of larger
183     * string, so that "^" won't match. */
184     {
185     int flags, result, numChars;
186     TclRegexp *regexp = (TclRegexp *)re;
187     Tcl_DString ds;
188     Tcl_UniChar *ustr;
189    
190     /*
191     * If the starting point is offset from the beginning of the buffer,
192     * then we need to tell the regexp engine not to match "^".
193     */
194    
195     if (string > start) {
196     flags = REG_NOTBOL;
197     } else {
198     flags = 0;
199     }
200    
201     /*
202     * Remember the string for use by Tcl_RegExpRange().
203     */
204    
205     regexp->string = string;
206     regexp->objPtr = NULL;
207    
208     /*
209     * Convert the string to Unicode and perform the match.
210     */
211    
212     Tcl_DStringInit(&ds);
213     ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
214     numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
215     result = RegExpExecUniChar(interp, re, ustr, numChars,
216     -1 /* nmatches */, flags);
217     Tcl_DStringFree(&ds);
218    
219     return result;
220     }
221    
222     /*
223     *---------------------------------------------------------------------------
224     *
225     * Tcl_RegExpRange --
226     *
227     * Returns pointers describing the range of a regular expression match,
228     * or one of the subranges within the match.
229     *
230     * Results:
231     * The variables at *startPtr and *endPtr are modified to hold the
232     * addresses of the endpoints of the range given by index. If the
233     * specified range doesn't exist then NULLs are returned.
234     *
235     * Side effects:
236     * None.
237     *
238     *---------------------------------------------------------------------------
239     */
240    
241     void
242     Tcl_RegExpRange(re, index, startPtr, endPtr)
243     Tcl_RegExp re; /* Compiled regular expression that has
244     * been passed to Tcl_RegExpExec. */
245     int index; /* 0 means give the range of the entire
246     * match, > 0 means give the range of
247     * a matching subrange. */
248     char **startPtr; /* Store address of first character in
249     * (sub-) range here. */
250     char **endPtr; /* Store address of character just after last
251     * in (sub-) range here. */
252     {
253     TclRegexp *regexpPtr = (TclRegexp *) re;
254     CONST char *string;
255    
256     if ((size_t) index > regexpPtr->re.re_nsub) {
257     *startPtr = *endPtr = NULL;
258     } else if (regexpPtr->matches[index].rm_so < 0) {
259     *startPtr = *endPtr = NULL;
260     } else {
261     if (regexpPtr->objPtr) {
262     string = Tcl_GetString(regexpPtr->objPtr);
263     } else {
264     string = regexpPtr->string;
265     }
266     *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
267     *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
268     }
269     }
270    
271     /*
272     *---------------------------------------------------------------------------
273     *
274     * RegExpExecUniChar --
275     *
276     * Execute the regular expression matcher using a compiled form of a
277     * regular expression and save information about any match that is
278     * found.
279     *
280     * Results:
281     * If an error occurs during the matching operation then -1 is
282     * returned and an error message is left in interp's result.
283     * Otherwise the return value is 1 if a matching range was found or
284     * 0 if there was no matching range.
285     *
286     * Side effects:
287     * None.
288     *
289     *----------------------------------------------------------------------
290     */
291    
292     static int
293     RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
294     Tcl_Interp *interp; /* Interpreter to use for error reporting. */
295     Tcl_RegExp re; /* Compiled regular expression; returned by
296     * a previous call to Tcl_GetRegExpFromObj */
297     CONST Tcl_UniChar *wString; /* String against which to match re. */
298     int numChars; /* Length of Tcl_UniChar string (must
299     * be >= 0). */
300     int nmatches; /* How many subexpression matches (counting
301     * the whole match as subexpression 0) are
302     * of interest. -1 means "don't know". */
303     int flags; /* Regular expression flags. */
304     {
305     int status;
306     TclRegexp *regexpPtr = (TclRegexp *) re;
307     size_t last = regexpPtr->re.re_nsub + 1;
308     size_t nm = last;
309    
310     if (nmatches >= 0 && (size_t) nmatches < nm) {
311     nm = (size_t) nmatches;
312     }
313    
314     status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
315     &regexpPtr->details, nm, regexpPtr->matches, flags);
316    
317     /*
318     * Check for errors.
319     */
320    
321     if (status != REG_OKAY) {
322     if (status == REG_NOMATCH) {
323     return 0;
324     }
325     if (interp != NULL) {
326     TclRegError(interp, "error while matching regular expression: ",
327     status);
328     }
329     return -1;
330     }
331     return 1;
332     }
333    
334     /*
335     *---------------------------------------------------------------------------
336     *
337     * TclRegExpRangeUniChar --
338     *
339     * Returns pointers describing the range of a regular expression match,
340     * or one of the subranges within the match, or the hypothetical range
341     * represented by the rm_extend field of the rm_detail_t.
342     *
343     * Results:
344     * The variables at *startPtr and *endPtr are modified to hold the
345     * offsets of the endpoints of the range given by index. If the
346     * specified range doesn't exist then -1s are supplied.
347     *
348     * Side effects:
349     * None.
350     *
351     *---------------------------------------------------------------------------
352     */
353    
354     void
355     TclRegExpRangeUniChar(re, index, startPtr, endPtr)
356     Tcl_RegExp re; /* Compiled regular expression that has
357     * been passed to Tcl_RegExpExec. */
358     int index; /* 0 means give the range of the entire
359     * match, > 0 means give the range of
360     * a matching subrange, -1 means the
361     * range of the rm_extend field. */
362     int *startPtr; /* Store address of first character in
363     * (sub-) range here. */
364     int *endPtr; /* Store address of character just after last
365     * in (sub-) range here. */
366     {
367     TclRegexp *regexpPtr = (TclRegexp *) re;
368    
369     if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
370     *startPtr = regexpPtr->details.rm_extend.rm_so;
371     *endPtr = regexpPtr->details.rm_extend.rm_eo;
372     } else if ((size_t) index > regexpPtr->re.re_nsub) {
373     *startPtr = -1;
374     *endPtr = -1;
375     } else {
376     *startPtr = regexpPtr->matches[index].rm_so;
377     *endPtr = regexpPtr->matches[index].rm_eo;
378     }
379     }
380    
381     /*
382     *----------------------------------------------------------------------
383     *
384     * Tcl_RegExpMatch --
385     *
386     * See if a string matches a regular expression.
387     *
388     * Results:
389     * If an error occurs during the matching operation then -1
390     * is returned and the interp's result contains an error message.
391     * Otherwise the return value is 1 if "string" matches "pattern"
392     * and 0 otherwise.
393     *
394     * Side effects:
395     * None.
396     *
397     *----------------------------------------------------------------------
398     */
399    
400     int
401     Tcl_RegExpMatch(interp, string, pattern)
402     Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
403     char *string; /* String. */
404     char *pattern; /* Regular expression to match against
405     * string. */
406     {
407     Tcl_RegExp re;
408    
409     re = Tcl_RegExpCompile(interp, pattern);
410     if (re == NULL) {
411     return -1;
412     }
413     return Tcl_RegExpExec(interp, re, string, string);
414     }
415    
416     /*
417     *----------------------------------------------------------------------
418     *
419     * Tcl_RegExpExecObj --
420     *
421     * Execute a precompiled regexp against the given object.
422     *
423     * Results:
424     * If an error occurs during the matching operation then -1
425     * is returned and the interp's result contains an error message.
426     * Otherwise the return value is 1 if "string" matches "pattern"
427     * and 0 otherwise.
428     *
429     * Side effects:
430     * Converts the object to a Unicode object.
431     *
432     *----------------------------------------------------------------------
433     */
434    
435     int
436     Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
437     Tcl_Interp *interp; /* Interpreter to use for error reporting. */
438     Tcl_RegExp re; /* Compiled regular expression; must have
439     * been returned by previous call to
440     * Tcl_GetRegExpFromObj. */
441     Tcl_Obj *objPtr; /* String against which to match re. */
442     int offset; /* Character index that marks where matching
443     * should begin. */
444     int nmatches; /* How many subexpression matches (counting
445     * the whole match as subexpression 0) are
446     * of interest. -1 means all of them. */
447     int flags; /* Regular expression execution flags. */
448     {
449     TclRegexp *regexpPtr = (TclRegexp *) re;
450     Tcl_UniChar *udata;
451     int length;
452    
453     /*
454     * Save the target object so we can extract strings from it later.
455     */
456    
457     regexpPtr->string = NULL;
458     regexpPtr->objPtr = objPtr;
459    
460     udata = Tcl_GetUnicode(objPtr);
461     length = Tcl_GetCharLength(objPtr);
462    
463     if (offset > length) {
464     offset = length;
465     }
466     udata += offset;
467     length -= offset;
468    
469     return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
470     }
471    
472     /*
473     *----------------------------------------------------------------------
474     *
475     * Tcl_RegExpMatchObj --
476     *
477     * See if an object matches a regular expression.
478     *
479     * Results:
480     * If an error occurs during the matching operation then -1
481     * is returned and the interp's result contains an error message.
482     * Otherwise the return value is 1 if "string" matches "pattern"
483     * and 0 otherwise.
484     *
485     * Side effects:
486     * Changes the internal rep of the pattern and string objects.
487     *
488     *----------------------------------------------------------------------
489     */
490    
491     int
492     Tcl_RegExpMatchObj(interp, stringObj, patternObj)
493     Tcl_Interp *interp; /* Used for error reporting. May be NULL. */
494     Tcl_Obj *stringObj; /* Object containing the String to search. */
495     Tcl_Obj *patternObj; /* Regular expression to match against
496     * string. */
497     {
498     Tcl_RegExp re;
499    
500     re = Tcl_GetRegExpFromObj(interp, patternObj,
501     TCL_REG_ADVANCED | TCL_REG_NOSUB);
502     if (re == NULL) {
503     return -1;
504     }
505     return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
506     0 /* nmatches */, 0 /* flags */);
507     }
508    
509     /*
510     *----------------------------------------------------------------------
511     *
512     * Tcl_RegExpGetInfo --
513     *
514     * Retrieve information about the current match.
515     *
516     * Results:
517     * None.
518     *
519     * Side effects:
520     * None.
521     *
522     *----------------------------------------------------------------------
523     */
524    
525     void
526     Tcl_RegExpGetInfo(regexp, infoPtr)
527     Tcl_RegExp regexp; /* Pattern from which to get subexpressions. */
528     Tcl_RegExpInfo *infoPtr; /* Match information is stored here. */
529     {
530     TclRegexp *regexpPtr = (TclRegexp *) regexp;
531    
532     infoPtr->nsubs = regexpPtr->re.re_nsub;
533     infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
534     infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
535     }
536    
537     /*
538     *----------------------------------------------------------------------
539     *
540     * Tcl_GetRegExpFromObj --
541     *
542     * Compile a regular expression into a form suitable for fast
543     * matching. This procedure caches the result in a Tcl_Obj.
544     *
545     * Results:
546     * The return value is a pointer to the compiled form of string,
547     * suitable for passing to Tcl_RegExpExec. If an error occurred
548     * while compiling the pattern, then NULL is returned and an error
549     * message is left in the interp's result.
550     *
551     * Side effects:
552     * Updates the native rep of the Tcl_Obj.
553     *
554     *----------------------------------------------------------------------
555     */
556    
557     Tcl_RegExp
558     Tcl_GetRegExpFromObj(interp, objPtr, flags)
559     Tcl_Interp *interp; /* For use in error reporting, and to access
560     * the interp regexp cache. */
561     Tcl_Obj *objPtr; /* Object whose string rep contains regular
562     * expression pattern. Internal rep will be
563     * changed to compiled form of this regular
564     * expression. */
565     int flags; /* Regular expression compilation flags. */
566     {
567     int length;
568     Tcl_ObjType *typePtr;
569     TclRegexp *regexpPtr;
570     char *pattern;
571    
572     typePtr = objPtr->typePtr;
573     regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
574    
575     if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
576     pattern = Tcl_GetStringFromObj(objPtr, &length);
577    
578     regexpPtr = CompileRegexp(interp, pattern, length, flags);
579     if (regexpPtr == NULL) {
580     return NULL;
581     }
582    
583     /*
584     * Add a reference to the regexp so it will persist even if it is
585     * pushed out of the current thread's regexp cache. This reference
586     * will be removed when the object's internal rep is freed.
587     */
588    
589     regexpPtr->refCount++;
590    
591     /*
592     * Free the old representation and set our type.
593     */
594    
595     if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
596     (*typePtr->freeIntRepProc)(objPtr);
597     }
598     objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
599     objPtr->typePtr = &tclRegexpType;
600     }
601     return (Tcl_RegExp) regexpPtr;
602     }
603    
604     /*
605     *----------------------------------------------------------------------
606     *
607     * TclRegAbout --
608     *
609     * Return information about a compiled regular expression.
610     *
611     * Results:
612     * The return value is -1 for failure, 0 for success, although at
613     * the moment there's nothing that could fail. On success, a list
614     * is left in the interp's result: first element is the subexpression
615     * count, second is a list of re_info bit names.
616     *
617     * Side effects:
618     * None.
619     *
620     *----------------------------------------------------------------------
621     */
622    
623     int
624     TclRegAbout(interp, re)
625     Tcl_Interp *interp; /* For use in variable assignment. */
626     Tcl_RegExp re; /* The compiled regular expression. */
627     {
628     TclRegexp *regexpPtr = (TclRegexp *)re;
629     char buf[TCL_INTEGER_SPACE];
630     static struct infoname {
631     int bit;
632     char *text;
633     } infonames[] = {
634     {REG_UBACKREF, "REG_UBACKREF"},
635     {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"},
636     {REG_UBOUNDS, "REG_UBOUNDS"},
637     {REG_UBRACES, "REG_UBRACES"},
638     {REG_UBSALNUM, "REG_UBSALNUM"},
639     {REG_UPBOTCH, "REG_UPBOTCH"},
640     {REG_UBBS, "REG_UBBS"},
641     {REG_UNONPOSIX, "REG_UNONPOSIX"},
642     {REG_UUNSPEC, "REG_UUNSPEC"},
643     {REG_UUNPORT, "REG_UUNPORT"},
644     {REG_ULOCALE, "REG_ULOCALE"},
645     {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
646     {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
647     {REG_USHORTEST, "REG_USHORTEST"},
648     {0, ""}
649     };
650     struct infoname *inf;
651     int n;
652    
653     Tcl_ResetResult(interp);
654    
655     sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
656     Tcl_AppendElement(interp, buf);
657    
658     /*
659     * Must count bits before generating list, because we must know
660     * whether {} are needed before we start appending names.
661     */
662     n = 0;
663     for (inf = infonames; inf->bit != 0; inf++) {
664     if (regexpPtr->re.re_info&inf->bit) {
665     n++;
666     }
667     }
668     if (n != 1) {
669     Tcl_AppendResult(interp, " {", NULL);
670     }
671     for (inf = infonames; inf->bit != 0; inf++) {
672     if (regexpPtr->re.re_info&inf->bit) {
673     Tcl_AppendElement(interp, inf->text);
674     }
675     }
676     if (n != 1) {
677     Tcl_AppendResult(interp, "}", NULL);
678     }
679    
680     return 0;
681     }
682    
683     /*
684     *----------------------------------------------------------------------
685     *
686     * TclRegError --
687     *
688     * Generate an error message based on the regexp status code.
689     *
690     * Results:
691     * Places an error in the interpreter.
692     *
693     * Side effects:
694     * Sets errorCode as well.
695     *
696     *----------------------------------------------------------------------
697     */
698    
699     void
700     TclRegError(interp, msg, status)
701     Tcl_Interp *interp; /* Interpreter for error reporting. */
702     char *msg; /* Message to prepend to error. */
703     int status; /* Status code to report. */
704     {
705     char buf[100]; /* ample in practice */
706     char cbuf[100]; /* lots in practice */
707     size_t n;
708     char *p;
709    
710     Tcl_ResetResult(interp);
711     n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
712     p = (n > sizeof(buf)) ? "..." : "";
713     Tcl_AppendResult(interp, msg, buf, p, NULL);
714    
715     sprintf(cbuf, "%d", status);
716     (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
717     Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
718     }
719    
720    
721     /*
722     *----------------------------------------------------------------------
723     *
724     * FreeRegexpInternalRep --
725     *
726     * Deallocate the storage associated with a regexp object's internal
727     * representation.
728     *
729     * Results:
730     * None.
731     *
732     * Side effects:
733     * Frees the compiled regular expression.
734     *
735     *----------------------------------------------------------------------
736     */
737    
738     static void
739     FreeRegexpInternalRep(objPtr)
740     Tcl_Obj *objPtr; /* Regexp object with internal rep to free. */
741     {
742     TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
743    
744     /*
745     * If this is the last reference to the regexp, free it.
746     */
747    
748     if (--(regexpRepPtr->refCount) <= 0) {
749     FreeRegexp(regexpRepPtr);
750     }
751     }
752    
753     /*
754     *----------------------------------------------------------------------
755     *
756     * DupRegexpInternalRep --
757     *
758     * We copy the reference to the compiled regexp and bump its
759     * reference count.
760     *
761     * Results:
762     * None.
763     *
764     * Side effects:
765     * Increments the reference count of the regexp.
766     *
767     *----------------------------------------------------------------------
768     */
769    
770     static void
771     DupRegexpInternalRep(srcPtr, copyPtr)
772     Tcl_Obj *srcPtr; /* Object with internal rep to copy. */
773     Tcl_Obj *copyPtr; /* Object with internal rep to set. */
774     {
775     TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
776     regexpPtr->refCount++;
777     copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
778     copyPtr->typePtr = &tclRegexpType;
779     }
780    
781     /*
782     *----------------------------------------------------------------------
783     *
784     * SetRegexpFromAny --
785     *
786     * Attempt to generate a compiled regular expression for the Tcl object
787     * "objPtr".
788     *
789     * Results:
790     * The return value is TCL_OK or TCL_ERROR. If an error occurs during
791     * conversion, an error message is left in the interpreter's result
792     * unless "interp" is NULL.
793     *
794     * Side effects:
795     * If no error occurs, a regular expression is stored as "objPtr"s
796     * internal representation.
797     *
798     *----------------------------------------------------------------------
799     */
800    
801     static int
802     SetRegexpFromAny(interp, objPtr)
803     Tcl_Interp *interp; /* Used for error reporting if not NULL. */
804     Tcl_Obj *objPtr; /* The object to convert. */
805     {
806     if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
807     return TCL_ERROR;
808     }
809     return TCL_OK;
810     }
811    
812     /*
813     *---------------------------------------------------------------------------
814     *
815     * CompileRegexp --
816     *
817     * Attempt to compile the given regexp pattern. If the compiled
818     * regular expression can be found in the per-thread cache, it
819     * will be used instead of compiling a new copy.
820     *
821     * Results:
822     * The return value is a pointer to a newly allocated TclRegexp
823     * that represents the compiled pattern, or NULL if the pattern
824     * could not be compiled. If NULL is returned, an error message is
825     * left in the interp's result.
826     *
827     * Side effects:
828     * The thread-local regexp cache is updated and a new TclRegexp may
829     * be allocated.
830     *
831     *----------------------------------------------------------------------
832     */
833    
834     static TclRegexp *
835     CompileRegexp(interp, string, length, flags)
836     Tcl_Interp *interp; /* Used for error reporting if not NULL. */
837     char *string; /* The regexp to compile (UTF-8). */
838     int length; /* The length of the string in bytes. */
839     int flags; /* Compilation flags. */
840     {
841     TclRegexp *regexpPtr;
842     Tcl_UniChar *uniString;
843     int numChars;
844     Tcl_DString stringBuf;
845     int status, i;
846     ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
847    
848     if (!tsdPtr->initialized) {
849     tsdPtr->initialized = 1;
850     Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
851     }
852    
853     /*
854     * This routine maintains a second-level regular expression cache in
855     * addition to the per-object regexp cache. The per-thread cache is needed
856     * to handle the case where for various reasons the object is lost between
857     * invocations of the regexp command, but the literal pattern is the same.
858     */
859    
860     /*
861     * Check the per-thread compiled regexp cache. We can only reuse
862     * a regexp if it has the same pattern and the same flags.
863     */
864    
865     for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
866     if ((length == tsdPtr->patLengths[i])
867     && (tsdPtr->regexps[i]->flags == flags)
868     && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
869     /*
870     * Move the matched pattern to the first slot in the
871     * cache and shift the other patterns down one position.
872     */
873    
874     if (i != 0) {
875     int j;
876     char *cachedString;
877    
878     cachedString = tsdPtr->patterns[i];
879     regexpPtr = tsdPtr->regexps[i];
880     for (j = i-1; j >= 0; j--) {
881     tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
882     tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
883     tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
884     }
885     tsdPtr->patterns[0] = cachedString;
886     tsdPtr->patLengths[0] = length;
887     tsdPtr->regexps[0] = regexpPtr;
888     }
889     return tsdPtr->regexps[0];
890     }
891     }
892    
893     /*
894     * This is a new expression, so compile it and add it to the cache.
895     */
896    
897     regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
898     regexpPtr->objPtr = NULL;
899     regexpPtr->string = NULL;
900     regexpPtr->details.rm_extend.rm_so = -1;
901     regexpPtr->details.rm_extend.rm_eo = -1;
902    
903     /*
904     * Get the up-to-date string representation and map to unicode.
905     */
906    
907     Tcl_DStringInit(&stringBuf);
908     uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
909     numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
910    
911     /*
912     * Compile the string and check for errors.
913     */
914    
915     regexpPtr->flags = flags;
916     status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
917     Tcl_DStringFree(&stringBuf);
918    
919     if (status != REG_OKAY) {
920     /*
921     * Clean up and report errors in the interpreter, if possible.
922     */
923    
924     ckfree((char *)regexpPtr);
925     if (interp) {
926     TclRegError(interp,
927     "couldn't compile regular expression pattern: ",
928     status);
929     }
930     return NULL;
931     }
932    
933     /*
934     * Allocate enough space for all of the subexpressions, plus one
935     * extra for the entire pattern.
936     */
937    
938     regexpPtr->matches = (regmatch_t *) ckalloc(
939     sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
940    
941     /*
942     * Initialize the refcount to one initially, since it is in the cache.
943     */
944    
945     regexpPtr->refCount = 1;
946    
947     /*
948     * Free the last regexp, if necessary, and make room at the head of the
949     * list for the new regexp.
950     */
951    
952     if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
953     TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
954     if (--(oldRegexpPtr->refCount) <= 0) {
955     FreeRegexp(oldRegexpPtr);
956     }
957     ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
958     }
959     for (i = NUM_REGEXPS - 2; i >= 0; i--) {
960     tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
961     tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
962     tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
963     }
964     tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
965     strcpy(tsdPtr->patterns[0], string);
966     tsdPtr->patLengths[0] = length;
967     tsdPtr->regexps[0] = regexpPtr;
968    
969     return regexpPtr;
970     }
971    
972     /*
973     *----------------------------------------------------------------------
974     *
975     * FreeRegexp --
976     *
977     * Release the storage associated with a TclRegexp.
978     *
979     * Results:
980     * None.
981     *
982     * Side effects:
983     * None.
984     *
985     *----------------------------------------------------------------------
986     */
987    
988     static void
989     FreeRegexp(regexpPtr)
990     TclRegexp *regexpPtr; /* Compiled regular expression to free. */
991     {
992     TclReFree(&regexpPtr->re);
993     if (regexpPtr->matches) {
994     ckfree((char *) regexpPtr->matches);
995     }
996     ckfree((char *) regexpPtr);
997     }
998    
999     /*
1000     *----------------------------------------------------------------------
1001     *
1002     * FinalizeRegexp --
1003     *
1004     * Release the storage associated with the per-thread regexp
1005     * cache.
1006     *
1007     * Results:
1008     * None.
1009     *
1010     * Side effects:
1011     * None.
1012     *
1013     *----------------------------------------------------------------------
1014     */
1015    
1016     static void
1017     FinalizeRegexp(clientData)
1018     ClientData clientData; /* Not used. */
1019     {
1020     int i;
1021     TclRegexp *regexpPtr;
1022     ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1023    
1024     for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1025     regexpPtr = tsdPtr->regexps[i];
1026     if (--(regexpPtr->refCount) <= 0) {
1027     FreeRegexp(regexpPtr);
1028     }
1029     ckfree(tsdPtr->patterns[i]);
1030     }
1031     }
1032    
1033    
1034     /* $History: tclregexp.c $
1035     *
1036     * ***************** Version 1 *****************
1037     * User: Dtashley Date: 1/02/01 Time: 1:04a
1038     * Created in $/IjuScripter, IjuConsole/Source/Tcl Base
1039     * Initial check-in.
1040     */
1041    
1042     /* End of TCLREGEXP.C */

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25