/[dtapublic]/projs/ets/trunk/src/c_tcl_base_7_5_w_mods/tclregexp.c
ViewVC logotype

Diff of /projs/ets/trunk/src/c_tcl_base_7_5_w_mods/tclregexp.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

to_be_filed/sf_code/esrgpcpj/shared/tcl_base/tclregexp.c revision 29 by dashley, Sat Oct 8 07:08:47 2016 UTC projs/trunk/shared_source/c_tcl_base_7_5_w_mods/tclregexp.c revision 71 by dashley, Sat Nov 5 11:07:06 2016 UTC
# Line 1  Line 1 
 /* $Header: /cvsroot/esrg/sfesrg/esrgpcpj/shared/tcl_base/tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $ */  
   
 /*  
  * tclRegexp.c --  
  *  
  *      This file contains the public interfaces to the Tcl regular  
  *      expression mechanism.  
  *  
  * Copyright (c) 1998 by Sun Microsystems, Inc.  
  * Copyright (c) 1998-1999 by Scriptics Corporation.  
  *  
  * See the file "license.terms" for information on usage and redistribution  
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.  
  *  
  * RCS: @(#) $Id: tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $  
  */  
   
 #include "tclInt.h"  
 #include "tclPort.h"  
 #include "tclRegexp.h"  
   
 /*  
  *----------------------------------------------------------------------  
  * The routines in this file use Henry Spencer's regular expression  
  * package contained in the following additional source files:  
  *  
  *      regc_color.c    regc_cvec.c     regc_lex.c  
  *      regc_nfa.c      regcomp.c       regcustom.h  
  *      rege_dfa.c      regerror.c      regerrs.h  
  *      regex.h         regexec.c       regfree.c  
  *      regfronts.c     regguts.h  
  *  
  * Copyright (c) 1998 Henry Spencer.  All rights reserved.  
  *  
  * Development of this software was funded, in part, by Cray Research Inc.,  
  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics  
  * Corporation, none of whom are responsible for the results.  The author  
  * thanks all of them.  
  *  
  * Redistribution and use in source and binary forms -- with or without  
  * modification -- are permitted for any purpose, provided that  
  * redistributions in source form retain this entire copyright notice and  
  * indicate the origin and nature of any modifications.  
  *  
  * I'd appreciate being given credit for this package in the documentation  
  * of software which uses it, but that is not a requirement.  
  *  
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY  
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL  
  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,  
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,  
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;  
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR  
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF  
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
  *  
  * *** NOTE: this code has been altered slightly for use in Tcl: ***  
  * *** 1. Names have been changed, e.g. from re_comp to          ***  
  * ***    TclRegComp, to avoid clashes with other                ***  
  * ***    regexp implementations used by applications.           ***  
  */  
   
 /*  
  * Thread local storage used to maintain a per-thread cache of compiled  
  * regular expressions.  
  */  
   
 #define NUM_REGEXPS 30  
   
 typedef struct ThreadSpecificData {  
     int initialized;            /* Set to 1 when the module is initialized. */  
     char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled  
                                  * regular expression patterns.  NULL  
                                  * means that this slot isn't used.  
                                  * Malloc-ed. */  
     int patLengths[NUM_REGEXPS];/* Number of non-null characters in  
                                  * corresponding entry in patterns.  
                                  * -1 means entry isn't used. */  
     struct TclRegexp *regexps[NUM_REGEXPS];  
                                 /* Compiled forms of above strings.  Also  
                                  * malloc-ed, or NULL if not in use yet. */  
 } ThreadSpecificData;  
   
 static Tcl_ThreadDataKey dataKey;  
   
 /*  
  * Declarations for functions used only in this file.  
  */  
   
 static TclRegexp *      CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,  
                             char *pattern, int length, int flags));  
 static void             DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,  
                             Tcl_Obj *copyPtr));  
 static void             FinalizeRegexp _ANSI_ARGS_((ClientData clientData));  
 static void             FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));  
 static void             FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));  
 static int              RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,  
                             Tcl_RegExp re, CONST Tcl_UniChar *uniString,  
                             int numChars, int nmatches, int flags));  
 static int              SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,  
                             Tcl_Obj *objPtr));  
   
 /*  
  * The regular expression Tcl object type.  This serves as a cache  
  * of the compiled form of the regular expression.  
  */  
   
 Tcl_ObjType tclRegexpType = {  
     "regexp",                           /* name */  
     FreeRegexpInternalRep,              /* freeIntRepProc */  
     DupRegexpInternalRep,               /* dupIntRepProc */  
     NULL,                               /* updateStringProc */  
     SetRegexpFromAny                    /* setFromAnyProc */  
 };  
   
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * Tcl_RegExpCompile --  
  *  
  *      Compile a regular expression into a form suitable for fast  
  *      matching.  This procedure is DEPRECATED in favor of the  
  *      object version of the command.  
  *  
  * Results:  
  *      The return value is a pointer to the compiled form of string,  
  *      suitable for passing to Tcl_RegExpExec.  This compiled form  
  *      is only valid up until the next call to this procedure, so  
  *      don't keep these around for a long time!  If an error occurred  
  *      while compiling the pattern, then NULL is returned and an error  
  *      message is left in the interp's result.  
  *  
  * Side effects:  
  *      Updates the cache of compiled regexps.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 Tcl_RegExp  
 Tcl_RegExpCompile(interp, string)  
     Tcl_Interp *interp;         /* For use in error reporting and  
                                  * to access the interp regexp cache. */  
     char *string;               /* String for which to produce  
                                  * compiled regular expression. */  
 {  
     return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),  
             REG_ADVANCED);  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * Tcl_RegExpExec --  
  *  
  *      Execute the regular expression matcher using a compiled form  
  *      of a regular expression and save information about any match  
  *      that is found.  
  *  
  * Results:  
  *      If an error occurs during the matching operation then -1  
  *      is returned and the interp's result contains an error message.  
  *      Otherwise the return value is 1 if a matching range is  
  *      found and 0 if there is no matching range.  
  *  
  * Side effects:  
  *      None.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 int  
 Tcl_RegExpExec(interp, re, string, start)  
     Tcl_Interp *interp;         /* Interpreter to use for error reporting. */  
     Tcl_RegExp re;              /* Compiled regular expression;  must have  
                                  * been returned by previous call to  
                                  * Tcl_GetRegExpFromObj. */  
     CONST char *string;         /* String against which to match re. */  
     CONST char *start;          /* If string is part of a larger string,  
                                  * this identifies beginning of larger  
                                  * string, so that "^" won't match. */  
 {  
     int flags, result, numChars;  
     TclRegexp *regexp = (TclRegexp *)re;  
     Tcl_DString ds;  
     Tcl_UniChar *ustr;  
   
     /*  
      * If the starting point is offset from the beginning of the buffer,  
      * then we need to tell the regexp engine not to match "^".  
      */  
   
     if (string > start) {  
         flags = REG_NOTBOL;  
     } else {  
         flags = 0;  
     }  
   
     /*  
      * Remember the string for use by Tcl_RegExpRange().  
      */  
   
     regexp->string = string;  
     regexp->objPtr = NULL;  
   
     /*  
      * Convert the string to Unicode and perform the match.  
      */  
   
     Tcl_DStringInit(&ds);  
     ustr = Tcl_UtfToUniCharDString(string, -1, &ds);  
     numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);  
     result = RegExpExecUniChar(interp, re, ustr, numChars,  
             -1 /* nmatches */, flags);  
     Tcl_DStringFree(&ds);  
   
     return result;  
 }  
   
 /*  
  *---------------------------------------------------------------------------  
  *  
  * Tcl_RegExpRange --  
  *  
  *      Returns pointers describing the range of a regular expression match,  
  *      or one of the subranges within the match.  
  *  
  * Results:  
  *      The variables at *startPtr and *endPtr are modified to hold the  
  *      addresses of the endpoints of the range given by index.  If the  
  *      specified range doesn't exist then NULLs are returned.  
  *  
  * Side effects:  
  *      None.  
  *  
  *---------------------------------------------------------------------------  
  */  
   
 void  
 Tcl_RegExpRange(re, index, startPtr, endPtr)  
     Tcl_RegExp re;              /* Compiled regular expression that has  
                                  * been passed to Tcl_RegExpExec. */  
     int index;                  /* 0 means give the range of the entire  
                                  * match, > 0 means give the range of  
                                  * a matching subrange. */  
     char **startPtr;            /* Store address of first character in  
                                  * (sub-) range here. */  
     char **endPtr;              /* Store address of character just after last  
                                  * in (sub-) range here. */  
 {  
     TclRegexp *regexpPtr = (TclRegexp *) re;  
     CONST char *string;  
   
     if ((size_t) index > regexpPtr->re.re_nsub) {  
         *startPtr = *endPtr = NULL;  
     } else if (regexpPtr->matches[index].rm_so < 0) {  
         *startPtr = *endPtr = NULL;  
     } else {  
         if (regexpPtr->objPtr) {  
             string = Tcl_GetString(regexpPtr->objPtr);  
         } else {  
             string = regexpPtr->string;  
         }  
         *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);  
         *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);  
     }  
 }  
   
 /*  
  *---------------------------------------------------------------------------  
  *  
  * RegExpExecUniChar --  
  *  
  *      Execute the regular expression matcher using a compiled form of a  
  *      regular expression and save information about any match that is  
  *      found.  
  *  
  * Results:  
  *      If an error occurs during the matching operation then -1 is  
  *      returned and an error message is left in interp's result.  
  *      Otherwise the return value is 1 if a matching range was found or  
  *      0 if there was no matching range.  
  *  
  * Side effects:  
  *      None.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 static int  
 RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)  
     Tcl_Interp *interp;         /* Interpreter to use for error reporting. */  
     Tcl_RegExp re;              /* Compiled regular expression; returned by  
                                  * a previous call to Tcl_GetRegExpFromObj */  
     CONST Tcl_UniChar *wString; /* String against which to match re. */  
     int numChars;               /* Length of Tcl_UniChar string (must  
                                  * be >= 0). */  
     int nmatches;               /* How many subexpression matches (counting  
                                  * the whole match as subexpression 0) are  
                                  * of interest.  -1 means "don't know". */  
     int flags;                  /* Regular expression flags. */  
 {  
     int status;  
     TclRegexp *regexpPtr = (TclRegexp *) re;  
     size_t last = regexpPtr->re.re_nsub + 1;  
     size_t nm = last;  
   
     if (nmatches >= 0 && (size_t) nmatches < nm) {  
         nm = (size_t) nmatches;  
     }  
   
     status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,  
             &regexpPtr->details, nm, regexpPtr->matches, flags);  
   
     /*  
      * Check for errors.  
      */  
   
     if (status != REG_OKAY) {  
         if (status == REG_NOMATCH) {  
             return 0;  
         }  
         if (interp != NULL) {  
             TclRegError(interp, "error while matching regular expression: ",  
                     status);  
         }  
         return -1;  
     }  
     return 1;  
 }  
   
 /*  
  *---------------------------------------------------------------------------  
  *  
  * TclRegExpRangeUniChar --  
  *  
  *      Returns pointers describing the range of a regular expression match,  
  *      or one of the subranges within the match, or the hypothetical range  
  *      represented by the rm_extend field of the rm_detail_t.  
  *  
  * Results:  
  *      The variables at *startPtr and *endPtr are modified to hold the  
  *      offsets of the endpoints of the range given by index.  If the  
  *      specified range doesn't exist then -1s are supplied.  
  *  
  * Side effects:  
  *      None.  
  *  
  *---------------------------------------------------------------------------  
  */  
   
 void  
 TclRegExpRangeUniChar(re, index, startPtr, endPtr)  
     Tcl_RegExp re;              /* Compiled regular expression that has  
                                  * been passed to Tcl_RegExpExec. */  
     int index;                  /* 0 means give the range of the entire  
                                  * match, > 0 means give the range of  
                                  * a matching subrange, -1 means the  
                                  * range of the rm_extend field. */  
     int *startPtr;              /* Store address of first character in  
                                  * (sub-) range here. */  
     int *endPtr;                /* Store address of character just after last  
                                  * in (sub-) range here. */  
 {  
     TclRegexp *regexpPtr = (TclRegexp *) re;  
   
     if ((regexpPtr->flags&REG_EXPECT) && index == -1) {  
         *startPtr = regexpPtr->details.rm_extend.rm_so;  
         *endPtr = regexpPtr->details.rm_extend.rm_eo;  
     } else if ((size_t) index > regexpPtr->re.re_nsub) {  
         *startPtr = -1;  
         *endPtr = -1;  
     } else {  
         *startPtr = regexpPtr->matches[index].rm_so;  
         *endPtr = regexpPtr->matches[index].rm_eo;  
     }  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * Tcl_RegExpMatch --  
  *  
  *      See if a string matches a regular expression.  
  *  
  * Results:  
  *      If an error occurs during the matching operation then -1  
  *      is returned and the interp's result contains an error message.  
  *      Otherwise the return value is 1 if "string" matches "pattern"  
  *      and 0 otherwise.  
  *  
  * Side effects:  
  *      None.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 int  
 Tcl_RegExpMatch(interp, string, pattern)  
     Tcl_Interp *interp;         /* Used for error reporting. May be NULL. */  
     char *string;               /* String. */  
     char *pattern;              /* Regular expression to match against  
                                  * string. */  
 {  
     Tcl_RegExp re;  
   
     re = Tcl_RegExpCompile(interp, pattern);  
     if (re == NULL) {  
         return -1;  
     }  
     return Tcl_RegExpExec(interp, re, string, string);  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * Tcl_RegExpExecObj --  
  *  
  *      Execute a precompiled regexp against the given object.  
  *  
  * Results:  
  *      If an error occurs during the matching operation then -1  
  *      is returned and the interp's result contains an error message.  
  *      Otherwise the return value is 1 if "string" matches "pattern"  
  *      and 0 otherwise.  
  *  
  * Side effects:  
  *      Converts the object to a Unicode object.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 int  
 Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)  
     Tcl_Interp *interp;         /* Interpreter to use for error reporting. */  
     Tcl_RegExp re;              /* Compiled regular expression;  must have  
                                  * been returned by previous call to  
                                  * Tcl_GetRegExpFromObj. */  
     Tcl_Obj *objPtr;            /* String against which to match re. */  
     int offset;                 /* Character index that marks where matching  
                                  * should begin. */  
     int nmatches;               /* How many subexpression matches (counting  
                                  * the whole match as subexpression 0) are  
                                  * of interest.  -1 means all of them. */  
     int flags;                  /* Regular expression execution flags. */  
 {  
     TclRegexp *regexpPtr = (TclRegexp *) re;  
     Tcl_UniChar *udata;  
     int length;  
   
     /*  
      * Save the target object so we can extract strings from it later.  
      */  
   
     regexpPtr->string = NULL;  
     regexpPtr->objPtr = objPtr;  
   
     udata = Tcl_GetUnicode(objPtr);  
     length = Tcl_GetCharLength(objPtr);  
   
     if (offset > length) {  
         offset = length;  
     }  
     udata += offset;  
     length -= offset;  
       
     return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * Tcl_RegExpMatchObj --  
  *  
  *      See if an object matches a regular expression.  
  *  
  * Results:  
  *      If an error occurs during the matching operation then -1  
  *      is returned and the interp's result contains an error message.  
  *      Otherwise the return value is 1 if "string" matches "pattern"  
  *      and 0 otherwise.  
  *  
  * Side effects:  
  *      Changes the internal rep of the pattern and string objects.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 int  
 Tcl_RegExpMatchObj(interp, stringObj, patternObj)  
     Tcl_Interp *interp;         /* Used for error reporting. May be NULL. */  
     Tcl_Obj *stringObj;         /* Object containing the String to search. */  
     Tcl_Obj *patternObj;        /* Regular expression to match against  
                                  * string. */  
 {  
     Tcl_RegExp re;  
   
     re = Tcl_GetRegExpFromObj(interp, patternObj,  
             TCL_REG_ADVANCED | TCL_REG_NOSUB);  
     if (re == NULL) {  
         return -1;  
     }  
     return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,  
             0 /* nmatches */, 0 /* flags */);  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * Tcl_RegExpGetInfo --  
  *  
  *      Retrieve information about the current match.  
  *  
  * Results:  
  *      None.  
  *  
  * Side effects:  
  *      None.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 void  
 Tcl_RegExpGetInfo(regexp, infoPtr)  
     Tcl_RegExp regexp;          /* Pattern from which to get subexpressions. */  
     Tcl_RegExpInfo *infoPtr;    /* Match information is stored here.  */  
 {  
     TclRegexp *regexpPtr = (TclRegexp *) regexp;  
   
     infoPtr->nsubs = regexpPtr->re.re_nsub;  
     infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;  
     infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * Tcl_GetRegExpFromObj --  
  *  
  *      Compile a regular expression into a form suitable for fast  
  *      matching.  This procedure caches the result in a Tcl_Obj.  
  *  
  * Results:  
  *      The return value is a pointer to the compiled form of string,  
  *      suitable for passing to Tcl_RegExpExec.  If an error occurred  
  *      while compiling the pattern, then NULL is returned and an error  
  *      message is left in the interp's result.  
  *  
  * Side effects:  
  *      Updates the native rep of the Tcl_Obj.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 Tcl_RegExp  
 Tcl_GetRegExpFromObj(interp, objPtr, flags)  
     Tcl_Interp *interp;         /* For use in error reporting, and to access  
                                  * the interp regexp cache. */  
     Tcl_Obj *objPtr;            /* Object whose string rep contains regular  
                                  * expression pattern.  Internal rep will be  
                                  * changed to compiled form of this regular  
                                  * expression. */  
     int flags;                  /* Regular expression compilation flags. */  
 {  
     int length;  
     Tcl_ObjType *typePtr;  
     TclRegexp *regexpPtr;  
     char *pattern;  
   
     typePtr = objPtr->typePtr;  
     regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;  
   
     if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {  
         pattern = Tcl_GetStringFromObj(objPtr, &length);  
   
         regexpPtr = CompileRegexp(interp, pattern, length, flags);  
         if (regexpPtr == NULL) {  
             return NULL;  
         }  
   
         /*  
          * Add a reference to the regexp so it will persist even if it is  
          * pushed out of the current thread's regexp cache.  This reference  
          * will be removed when the object's internal rep is freed.  
          */  
   
         regexpPtr->refCount++;  
   
         /*  
          * Free the old representation and set our type.  
          */  
   
         if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {  
             (*typePtr->freeIntRepProc)(objPtr);  
         }  
         objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;  
         objPtr->typePtr = &tclRegexpType;  
     }  
     return (Tcl_RegExp) regexpPtr;  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * TclRegAbout --  
  *  
  *      Return information about a compiled regular expression.  
  *  
  * Results:  
  *      The return value is -1 for failure, 0 for success, although at  
  *      the moment there's nothing that could fail.  On success, a list  
  *      is left in the interp's result:  first element is the subexpression  
  *      count, second is a list of re_info bit names.  
  *  
  * Side effects:  
  *      None.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 int  
 TclRegAbout(interp, re)  
     Tcl_Interp *interp;         /* For use in variable assignment. */  
     Tcl_RegExp re;              /* The compiled regular expression. */  
 {  
     TclRegexp *regexpPtr = (TclRegexp *)re;  
     char buf[TCL_INTEGER_SPACE];  
     static struct infoname {  
         int bit;  
         char *text;  
     } infonames[] = {  
         {REG_UBACKREF,          "REG_UBACKREF"},  
         {REG_ULOOKAHEAD,        "REG_ULOOKAHEAD"},  
         {REG_UBOUNDS,           "REG_UBOUNDS"},  
         {REG_UBRACES,           "REG_UBRACES"},  
         {REG_UBSALNUM,          "REG_UBSALNUM"},  
         {REG_UPBOTCH,           "REG_UPBOTCH"},  
         {REG_UBBS,              "REG_UBBS"},  
         {REG_UNONPOSIX,         "REG_UNONPOSIX"},  
         {REG_UUNSPEC,           "REG_UUNSPEC"},  
         {REG_UUNPORT,           "REG_UUNPORT"},  
         {REG_ULOCALE,           "REG_ULOCALE"},  
         {REG_UEMPTYMATCH,       "REG_UEMPTYMATCH"},  
         {REG_UIMPOSSIBLE,       "REG_UIMPOSSIBLE"},  
         {REG_USHORTEST,         "REG_USHORTEST"},  
         {0,                     ""}  
     };  
     struct infoname *inf;  
     int n;  
   
     Tcl_ResetResult(interp);  
   
     sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));  
     Tcl_AppendElement(interp, buf);  
   
     /*  
      * Must count bits before generating list, because we must know  
      * whether {} are needed before we start appending names.  
      */  
     n = 0;  
     for (inf = infonames; inf->bit != 0; inf++) {  
         if (regexpPtr->re.re_info&inf->bit) {  
             n++;  
         }  
     }  
     if (n != 1) {  
         Tcl_AppendResult(interp, " {", NULL);  
     }  
     for (inf = infonames; inf->bit != 0; inf++) {  
         if (regexpPtr->re.re_info&inf->bit) {  
             Tcl_AppendElement(interp, inf->text);  
         }  
     }  
     if (n != 1) {  
         Tcl_AppendResult(interp, "}", NULL);  
     }  
   
     return 0;  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * TclRegError --  
  *  
  *      Generate an error message based on the regexp status code.  
  *  
  * Results:  
  *      Places an error in the interpreter.  
  *  
  * Side effects:  
  *      Sets errorCode as well.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 void  
 TclRegError(interp, msg, status)  
     Tcl_Interp *interp;         /* Interpreter for error reporting. */  
     char *msg;                  /* Message to prepend to error. */  
     int status;                 /* Status code to report. */  
 {  
     char buf[100];              /* ample in practice */  
     char cbuf[100];             /* lots in practice */  
     size_t n;  
     char *p;  
   
     Tcl_ResetResult(interp);  
     n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));  
     p = (n > sizeof(buf)) ? "..." : "";  
     Tcl_AppendResult(interp, msg, buf, p, NULL);  
   
     sprintf(cbuf, "%d", status);  
     (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));  
     Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);  
 }  
   
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * FreeRegexpInternalRep --  
  *  
  *      Deallocate the storage associated with a regexp object's internal  
  *      representation.  
  *  
  * Results:  
  *      None.  
  *  
  * Side effects:  
  *      Frees the compiled regular expression.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 static void  
 FreeRegexpInternalRep(objPtr)  
     Tcl_Obj *objPtr;            /* Regexp object with internal rep to free. */  
 {  
     TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;  
   
     /*  
      * If this is the last reference to the regexp, free it.  
      */  
   
     if (--(regexpRepPtr->refCount) <= 0) {  
         FreeRegexp(regexpRepPtr);  
     }  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * DupRegexpInternalRep --  
  *  
  *      We copy the reference to the compiled regexp and bump its  
  *      reference count.  
  *  
  * Results:  
  *      None.  
  *  
  * Side effects:  
  *      Increments the reference count of the regexp.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 static void  
 DupRegexpInternalRep(srcPtr, copyPtr)  
     Tcl_Obj *srcPtr;            /* Object with internal rep to copy. */  
     Tcl_Obj *copyPtr;           /* Object with internal rep to set. */  
 {  
     TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;  
     regexpPtr->refCount++;  
     copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;  
     copyPtr->typePtr = &tclRegexpType;  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * SetRegexpFromAny --  
  *  
  *      Attempt to generate a compiled regular expression for the Tcl object  
  *      "objPtr".  
  *  
  * Results:  
  *      The return value is TCL_OK or TCL_ERROR. If an error occurs during  
  *      conversion, an error message is left in the interpreter's result  
  *      unless "interp" is NULL.  
  *  
  * Side effects:  
  *      If no error occurs, a regular expression is stored as "objPtr"s  
  *      internal representation.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 static int  
 SetRegexpFromAny(interp, objPtr)  
     Tcl_Interp *interp;         /* Used for error reporting if not NULL. */  
     Tcl_Obj *objPtr;            /* The object to convert. */  
 {  
     if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {  
         return TCL_ERROR;  
     }  
     return TCL_OK;  
 }  
   
 /*  
  *---------------------------------------------------------------------------  
  *  
  * CompileRegexp --  
  *  
  *      Attempt to compile the given regexp pattern.  If the compiled  
  *      regular expression can be found in the per-thread cache, it  
  *      will be used instead of compiling a new copy.  
  *  
  * Results:  
  *      The return value is a pointer to a newly allocated TclRegexp  
  *      that represents the compiled pattern, or NULL if the pattern  
  *      could not be compiled.  If NULL is returned, an error message is  
  *      left in the interp's result.  
  *  
  * Side effects:  
  *      The thread-local regexp cache is updated and a new TclRegexp may  
  *      be allocated.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 static TclRegexp *  
 CompileRegexp(interp, string, length, flags)  
     Tcl_Interp *interp;         /* Used for error reporting if not NULL. */  
     char *string;               /* The regexp to compile (UTF-8). */  
     int length;                 /* The length of the string in bytes. */  
     int flags;                  /* Compilation flags. */  
 {  
     TclRegexp *regexpPtr;  
     Tcl_UniChar *uniString;  
     int numChars;  
     Tcl_DString stringBuf;  
     int status, i;  
     ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);  
   
     if (!tsdPtr->initialized) {  
         tsdPtr->initialized = 1;  
         Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);  
     }  
   
     /*  
      * This routine maintains a second-level regular expression cache in  
      * addition to the per-object regexp cache.  The per-thread cache is needed  
      * to handle the case where for various reasons the object is lost between  
      * invocations of the regexp command, but the literal pattern is the same.  
      */  
   
     /*  
      * Check the per-thread compiled regexp cache.  We can only reuse  
      * a regexp if it has the same pattern and the same flags.  
      */  
   
     for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {  
         if ((length == tsdPtr->patLengths[i])  
                 && (tsdPtr->regexps[i]->flags == flags)  
                 && (strcmp(string, tsdPtr->patterns[i]) == 0)) {  
             /*  
              * Move the matched pattern to the first slot in the  
              * cache and shift the other patterns down one position.  
              */  
   
             if (i != 0) {  
                 int j;  
                 char *cachedString;  
   
                 cachedString = tsdPtr->patterns[i];  
                 regexpPtr = tsdPtr->regexps[i];  
                 for (j = i-1; j >= 0; j--) {  
                     tsdPtr->patterns[j+1] = tsdPtr->patterns[j];  
                     tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];  
                     tsdPtr->regexps[j+1] = tsdPtr->regexps[j];  
                 }  
                 tsdPtr->patterns[0] = cachedString;  
                 tsdPtr->patLengths[0] = length;  
                 tsdPtr->regexps[0] = regexpPtr;  
             }  
             return tsdPtr->regexps[0];  
         }  
     }  
   
     /*  
      * This is a new expression, so compile it and add it to the cache.  
      */  
       
     regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));  
     regexpPtr->objPtr = NULL;  
     regexpPtr->string = NULL;  
     regexpPtr->details.rm_extend.rm_so = -1;  
     regexpPtr->details.rm_extend.rm_eo = -1;  
   
     /*  
      * Get the up-to-date string representation and map to unicode.  
      */  
   
     Tcl_DStringInit(&stringBuf);  
     uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);  
     numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);  
   
     /*  
      * Compile the string and check for errors.  
      */  
   
     regexpPtr->flags = flags;  
     status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);  
     Tcl_DStringFree(&stringBuf);  
   
     if (status != REG_OKAY) {  
         /*  
          * Clean up and report errors in the interpreter, if possible.  
          */  
   
         ckfree((char *)regexpPtr);  
         if (interp) {  
             TclRegError(interp,  
                     "couldn't compile regular expression pattern: ",  
                     status);  
         }  
         return NULL;  
     }  
   
     /*  
      * Allocate enough space for all of the subexpressions, plus one  
      * extra for the entire pattern.  
      */  
   
     regexpPtr->matches = (regmatch_t *) ckalloc(  
             sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));  
   
     /*  
      * Initialize the refcount to one initially, since it is in the cache.  
      */  
   
     regexpPtr->refCount = 1;  
   
     /*  
      * Free the last regexp, if necessary, and make room at the head of the  
      * list for the new regexp.  
      */  
   
     if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {  
         TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];  
         if (--(oldRegexpPtr->refCount) <= 0) {  
             FreeRegexp(oldRegexpPtr);  
         }  
         ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);  
     }  
     for (i = NUM_REGEXPS - 2; i >= 0; i--) {  
         tsdPtr->patterns[i+1] = tsdPtr->patterns[i];  
         tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];  
         tsdPtr->regexps[i+1] = tsdPtr->regexps[i];  
     }  
     tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));  
     strcpy(tsdPtr->patterns[0], string);  
     tsdPtr->patLengths[0] = length;  
     tsdPtr->regexps[0] = regexpPtr;  
   
     return regexpPtr;  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * FreeRegexp --  
  *  
  *      Release the storage associated with a TclRegexp.  
  *  
  * Results:  
  *      None.  
  *  
  * Side effects:  
  *      None.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 static void  
 FreeRegexp(regexpPtr)  
     TclRegexp *regexpPtr;       /* Compiled regular expression to free. */  
 {  
     TclReFree(&regexpPtr->re);  
     if (regexpPtr->matches) {  
         ckfree((char *) regexpPtr->matches);  
     }  
     ckfree((char *) regexpPtr);  
 }  
   
 /*  
  *----------------------------------------------------------------------  
  *  
  * FinalizeRegexp --  
  *  
  *      Release the storage associated with the per-thread regexp  
  *      cache.  
  *  
  * Results:  
  *      None.  
  *  
  * Side effects:  
  *      None.  
  *  
  *----------------------------------------------------------------------  
  */  
   
 static void  
 FinalizeRegexp(clientData)  
     ClientData clientData;      /* Not used. */  
 {  
     int i;  
     TclRegexp *regexpPtr;  
     ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);  
   
     for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {  
         regexpPtr = tsdPtr->regexps[i];  
         if (--(regexpPtr->refCount) <= 0) {  
             FreeRegexp(regexpPtr);  
         }  
         ckfree(tsdPtr->patterns[i]);  
     }  
 }  
   
   
 /* $History: tclregexp.c $  
  *  
  * *****************  Version 1  *****************  
  * User: Dtashley     Date: 1/02/01    Time: 1:04a  
  * Created in $/IjuScripter, IjuConsole/Source/Tcl Base  
  * Initial check-in.  
  */  
   
 /* End of TCLREGEXP.C */  
1    /* $Header$ */
2    /*
3     * tclRegexp.c --
4     *
5     *      This file contains the public interfaces to the Tcl regular
6     *      expression mechanism.
7     *
8     * Copyright (c) 1998 by Sun Microsystems, Inc.
9     * Copyright (c) 1998-1999 by Scriptics Corporation.
10     *
11     * See the file "license.terms" for information on usage and redistribution
12     * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
13     *
14     * RCS: @(#) $Id: tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $
15     */
16    
17    #include "tclInt.h"
18    #include "tclPort.h"
19    #include "tclRegexp.h"
20    
21    /*
22     *----------------------------------------------------------------------
23     * The routines in this file use Henry Spencer's regular expression
24     * package contained in the following additional source files:
25     *
26     *      regc_color.c    regc_cvec.c     regc_lex.c
27     *      regc_nfa.c      regcomp.c       regcustom.h
28     *      rege_dfa.c      regerror.c      regerrs.h
29     *      regex.h         regexec.c       regfree.c
30     *      regfronts.c     regguts.h
31     *
32     * Copyright (c) 1998 Henry Spencer.  All rights reserved.
33     *
34     * Development of this software was funded, in part, by Cray Research Inc.,
35     * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
36     * Corporation, none of whom are responsible for the results.  The author
37     * thanks all of them.
38     *
39     * Redistribution and use in source and binary forms -- with or without
40     * modification -- are permitted for any purpose, provided that
41     * redistributions in source form retain this entire copyright notice and
42     * indicate the origin and nature of any modifications.
43     *
44     * I'd appreciate being given credit for this package in the documentation
45     * of software which uses it, but that is not a requirement.
46     *
47     * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
48     * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
49     * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
50     * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
51     * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
52     * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
53     * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
54     * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
55     * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
56     * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57     *
58     * *** NOTE: this code has been altered slightly for use in Tcl: ***
59     * *** 1. Names have been changed, e.g. from re_comp to          ***
60     * ***    TclRegComp, to avoid clashes with other                ***
61     * ***    regexp implementations used by applications.           ***
62     */
63    
64    /*
65     * Thread local storage used to maintain a per-thread cache of compiled
66     * regular expressions.
67     */
68    
69    #define NUM_REGEXPS 30
70    
71    typedef struct ThreadSpecificData {
72        int initialized;            /* Set to 1 when the module is initialized. */
73        char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
74                                     * regular expression patterns.  NULL
75                                     * means that this slot isn't used.
76                                     * Malloc-ed. */
77        int patLengths[NUM_REGEXPS];/* Number of non-null characters in
78                                     * corresponding entry in patterns.
79                                     * -1 means entry isn't used. */
80        struct TclRegexp *regexps[NUM_REGEXPS];
81                                    /* Compiled forms of above strings.  Also
82                                     * malloc-ed, or NULL if not in use yet. */
83    } ThreadSpecificData;
84    
85    static Tcl_ThreadDataKey dataKey;
86    
87    /*
88     * Declarations for functions used only in this file.
89     */
90    
91    static TclRegexp *      CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
92                                char *pattern, int length, int flags));
93    static void             DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
94                                Tcl_Obj *copyPtr));
95    static void             FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
96    static void             FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
97    static void             FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
98    static int              RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
99                                Tcl_RegExp re, CONST Tcl_UniChar *uniString,
100                                int numChars, int nmatches, int flags));
101    static int              SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
102                                Tcl_Obj *objPtr));
103    
104    /*
105     * The regular expression Tcl object type.  This serves as a cache
106     * of the compiled form of the regular expression.
107     */
108    
109    Tcl_ObjType tclRegexpType = {
110        "regexp",                           /* name */
111        FreeRegexpInternalRep,              /* freeIntRepProc */
112        DupRegexpInternalRep,               /* dupIntRepProc */
113        NULL,                               /* updateStringProc */
114        SetRegexpFromAny                    /* setFromAnyProc */
115    };
116    
117    
118    /*
119     *----------------------------------------------------------------------
120     *
121     * Tcl_RegExpCompile --
122     *
123     *      Compile a regular expression into a form suitable for fast
124     *      matching.  This procedure is DEPRECATED in favor of the
125     *      object version of the command.
126     *
127     * Results:
128     *      The return value is a pointer to the compiled form of string,
129     *      suitable for passing to Tcl_RegExpExec.  This compiled form
130     *      is only valid up until the next call to this procedure, so
131     *      don't keep these around for a long time!  If an error occurred
132     *      while compiling the pattern, then NULL is returned and an error
133     *      message is left in the interp's result.
134     *
135     * Side effects:
136     *      Updates the cache of compiled regexps.
137     *
138     *----------------------------------------------------------------------
139     */
140    
141    Tcl_RegExp
142    Tcl_RegExpCompile(interp, string)
143        Tcl_Interp *interp;         /* For use in error reporting and
144                                     * to access the interp regexp cache. */
145        char *string;               /* String for which to produce
146                                     * compiled regular expression. */
147    {
148        return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
149                REG_ADVANCED);
150    }
151    
152    /*
153     *----------------------------------------------------------------------
154     *
155     * Tcl_RegExpExec --
156     *
157     *      Execute the regular expression matcher using a compiled form
158     *      of a regular expression and save information about any match
159     *      that is found.
160     *
161     * Results:
162     *      If an error occurs during the matching operation then -1
163     *      is returned and the interp's result contains an error message.
164     *      Otherwise the return value is 1 if a matching range is
165     *      found and 0 if there is no matching range.
166     *
167     * Side effects:
168     *      None.
169     *
170     *----------------------------------------------------------------------
171     */
172    
173    int
174    Tcl_RegExpExec(interp, re, string, start)
175        Tcl_Interp *interp;         /* Interpreter to use for error reporting. */
176        Tcl_RegExp re;              /* Compiled regular expression;  must have
177                                     * been returned by previous call to
178                                     * Tcl_GetRegExpFromObj. */
179        CONST char *string;         /* String against which to match re. */
180        CONST char *start;          /* If string is part of a larger string,
181                                     * this identifies beginning of larger
182                                     * string, so that "^" won't match. */
183    {
184        int flags, result, numChars;
185        TclRegexp *regexp = (TclRegexp *)re;
186        Tcl_DString ds;
187        Tcl_UniChar *ustr;
188    
189        /*
190         * If the starting point is offset from the beginning of the buffer,
191         * then we need to tell the regexp engine not to match "^".
192         */
193    
194        if (string > start) {
195            flags = REG_NOTBOL;
196        } else {
197            flags = 0;
198        }
199    
200        /*
201         * Remember the string for use by Tcl_RegExpRange().
202         */
203    
204        regexp->string = string;
205        regexp->objPtr = NULL;
206    
207        /*
208         * Convert the string to Unicode and perform the match.
209         */
210    
211        Tcl_DStringInit(&ds);
212        ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
213        numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
214        result = RegExpExecUniChar(interp, re, ustr, numChars,
215                -1 /* nmatches */, flags);
216        Tcl_DStringFree(&ds);
217    
218        return result;
219    }
220    
221    /*
222     *---------------------------------------------------------------------------
223     *
224     * Tcl_RegExpRange --
225     *
226     *      Returns pointers describing the range of a regular expression match,
227     *      or one of the subranges within the match.
228     *
229     * Results:
230     *      The variables at *startPtr and *endPtr are modified to hold the
231     *      addresses of the endpoints of the range given by index.  If the
232     *      specified range doesn't exist then NULLs are returned.
233     *
234     * Side effects:
235     *      None.
236     *
237     *---------------------------------------------------------------------------
238     */
239    
240    void
241    Tcl_RegExpRange(re, index, startPtr, endPtr)
242        Tcl_RegExp re;              /* Compiled regular expression that has
243                                     * been passed to Tcl_RegExpExec. */
244        int index;                  /* 0 means give the range of the entire
245                                     * match, > 0 means give the range of
246                                     * a matching subrange. */
247        char **startPtr;            /* Store address of first character in
248                                     * (sub-) range here. */
249        char **endPtr;              /* Store address of character just after last
250                                     * in (sub-) range here. */
251    {
252        TclRegexp *regexpPtr = (TclRegexp *) re;
253        CONST char *string;
254    
255        if ((size_t) index > regexpPtr->re.re_nsub) {
256            *startPtr = *endPtr = NULL;
257        } else if (regexpPtr->matches[index].rm_so < 0) {
258            *startPtr = *endPtr = NULL;
259        } else {
260            if (regexpPtr->objPtr) {
261                string = Tcl_GetString(regexpPtr->objPtr);
262            } else {
263                string = regexpPtr->string;
264            }
265            *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
266            *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
267        }
268    }
269    
270    /*
271     *---------------------------------------------------------------------------
272     *
273     * RegExpExecUniChar --
274     *
275     *      Execute the regular expression matcher using a compiled form of a
276     *      regular expression and save information about any match that is
277     *      found.
278     *
279     * Results:
280     *      If an error occurs during the matching operation then -1 is
281     *      returned and an error message is left in interp's result.
282     *      Otherwise the return value is 1 if a matching range was found or
283     *      0 if there was no matching range.
284     *
285     * Side effects:
286     *      None.
287     *
288     *----------------------------------------------------------------------
289     */
290    
291    static int
292    RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
293        Tcl_Interp *interp;         /* Interpreter to use for error reporting. */
294        Tcl_RegExp re;              /* Compiled regular expression; returned by
295                                     * a previous call to Tcl_GetRegExpFromObj */
296        CONST Tcl_UniChar *wString; /* String against which to match re. */
297        int numChars;               /* Length of Tcl_UniChar string (must
298                                     * be >= 0). */
299        int nmatches;               /* How many subexpression matches (counting
300                                     * the whole match as subexpression 0) are
301                                     * of interest.  -1 means "don't know". */
302        int flags;                  /* Regular expression flags. */
303    {
304        int status;
305        TclRegexp *regexpPtr = (TclRegexp *) re;
306        size_t last = regexpPtr->re.re_nsub + 1;
307        size_t nm = last;
308    
309        if (nmatches >= 0 && (size_t) nmatches < nm) {
310            nm = (size_t) nmatches;
311        }
312    
313        status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
314                &regexpPtr->details, nm, regexpPtr->matches, flags);
315    
316        /*
317         * Check for errors.
318         */
319    
320        if (status != REG_OKAY) {
321            if (status == REG_NOMATCH) {
322                return 0;
323            }
324            if (interp != NULL) {
325                TclRegError(interp, "error while matching regular expression: ",
326                        status);
327            }
328            return -1;
329        }
330        return 1;
331    }
332    
333    /*
334     *---------------------------------------------------------------------------
335     *
336     * TclRegExpRangeUniChar --
337     *
338     *      Returns pointers describing the range of a regular expression match,
339     *      or one of the subranges within the match, or the hypothetical range
340     *      represented by the rm_extend field of the rm_detail_t.
341     *
342     * Results:
343     *      The variables at *startPtr and *endPtr are modified to hold the
344     *      offsets of the endpoints of the range given by index.  If the
345     *      specified range doesn't exist then -1s are supplied.
346     *
347     * Side effects:
348     *      None.
349     *
350     *---------------------------------------------------------------------------
351     */
352    
353    void
354    TclRegExpRangeUniChar(re, index, startPtr, endPtr)
355        Tcl_RegExp re;              /* Compiled regular expression that has
356                                     * been passed to Tcl_RegExpExec. */
357        int index;                  /* 0 means give the range of the entire
358                                     * match, > 0 means give the range of
359                                     * a matching subrange, -1 means the
360                                     * range of the rm_extend field. */
361        int *startPtr;              /* Store address of first character in
362                                     * (sub-) range here. */
363        int *endPtr;                /* Store address of character just after last
364                                     * in (sub-) range here. */
365    {
366        TclRegexp *regexpPtr = (TclRegexp *) re;
367    
368        if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
369            *startPtr = regexpPtr->details.rm_extend.rm_so;
370            *endPtr = regexpPtr->details.rm_extend.rm_eo;
371        } else if ((size_t) index > regexpPtr->re.re_nsub) {
372            *startPtr = -1;
373            *endPtr = -1;
374        } else {
375            *startPtr = regexpPtr->matches[index].rm_so;
376            *endPtr = regexpPtr->matches[index].rm_eo;
377        }
378    }
379    
380    /*
381     *----------------------------------------------------------------------
382     *
383     * Tcl_RegExpMatch --
384     *
385     *      See if a string matches a regular expression.
386     *
387     * Results:
388     *      If an error occurs during the matching operation then -1
389     *      is returned and the interp's result contains an error message.
390     *      Otherwise the return value is 1 if "string" matches "pattern"
391     *      and 0 otherwise.
392     *
393     * Side effects:
394     *      None.
395     *
396     *----------------------------------------------------------------------
397     */
398    
399    int
400    Tcl_RegExpMatch(interp, string, pattern)
401        Tcl_Interp *interp;         /* Used for error reporting. May be NULL. */
402        char *string;               /* String. */
403        char *pattern;              /* Regular expression to match against
404                                     * string. */
405    {
406        Tcl_RegExp re;
407    
408        re = Tcl_RegExpCompile(interp, pattern);
409        if (re == NULL) {
410            return -1;
411        }
412        return Tcl_RegExpExec(interp, re, string, string);
413    }
414    
415    /*
416     *----------------------------------------------------------------------
417     *
418     * Tcl_RegExpExecObj --
419     *
420     *      Execute a precompiled regexp against the given object.
421     *
422     * Results:
423     *      If an error occurs during the matching operation then -1
424     *      is returned and the interp's result contains an error message.
425     *      Otherwise the return value is 1 if "string" matches "pattern"
426     *      and 0 otherwise.
427     *
428     * Side effects:
429     *      Converts the object to a Unicode object.
430     *
431     *----------------------------------------------------------------------
432     */
433    
434    int
435    Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
436        Tcl_Interp *interp;         /* Interpreter to use for error reporting. */
437        Tcl_RegExp re;              /* Compiled regular expression;  must have
438                                     * been returned by previous call to
439                                     * Tcl_GetRegExpFromObj. */
440        Tcl_Obj *objPtr;            /* String against which to match re. */
441        int offset;                 /* Character index that marks where matching
442                                     * should begin. */
443        int nmatches;               /* How many subexpression matches (counting
444                                     * the whole match as subexpression 0) are
445                                     * of interest.  -1 means all of them. */
446        int flags;                  /* Regular expression execution flags. */
447    {
448        TclRegexp *regexpPtr = (TclRegexp *) re;
449        Tcl_UniChar *udata;
450        int length;
451    
452        /*
453         * Save the target object so we can extract strings from it later.
454         */
455    
456        regexpPtr->string = NULL;
457        regexpPtr->objPtr = objPtr;
458    
459        udata = Tcl_GetUnicode(objPtr);
460        length = Tcl_GetCharLength(objPtr);
461    
462        if (offset > length) {
463            offset = length;
464        }
465        udata += offset;
466        length -= offset;
467        
468        return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
469    }
470    
471    /*
472     *----------------------------------------------------------------------
473     *
474     * Tcl_RegExpMatchObj --
475     *
476     *      See if an object matches a regular expression.
477     *
478     * Results:
479     *      If an error occurs during the matching operation then -1
480     *      is returned and the interp's result contains an error message.
481     *      Otherwise the return value is 1 if "string" matches "pattern"
482     *      and 0 otherwise.
483     *
484     * Side effects:
485     *      Changes the internal rep of the pattern and string objects.
486     *
487     *----------------------------------------------------------------------
488     */
489    
490    int
491    Tcl_RegExpMatchObj(interp, stringObj, patternObj)
492        Tcl_Interp *interp;         /* Used for error reporting. May be NULL. */
493        Tcl_Obj *stringObj;         /* Object containing the String to search. */
494        Tcl_Obj *patternObj;        /* Regular expression to match against
495                                     * string. */
496    {
497        Tcl_RegExp re;
498    
499        re = Tcl_GetRegExpFromObj(interp, patternObj,
500                TCL_REG_ADVANCED | TCL_REG_NOSUB);
501        if (re == NULL) {
502            return -1;
503        }
504        return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
505                0 /* nmatches */, 0 /* flags */);
506    }
507    
508    /*
509     *----------------------------------------------------------------------
510     *
511     * Tcl_RegExpGetInfo --
512     *
513     *      Retrieve information about the current match.
514     *
515     * Results:
516     *      None.
517     *
518     * Side effects:
519     *      None.
520     *
521     *----------------------------------------------------------------------
522     */
523    
524    void
525    Tcl_RegExpGetInfo(regexp, infoPtr)
526        Tcl_RegExp regexp;          /* Pattern from which to get subexpressions. */
527        Tcl_RegExpInfo *infoPtr;    /* Match information is stored here.  */
528    {
529        TclRegexp *regexpPtr = (TclRegexp *) regexp;
530    
531        infoPtr->nsubs = regexpPtr->re.re_nsub;
532        infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
533        infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
534    }
535    
536    /*
537     *----------------------------------------------------------------------
538     *
539     * Tcl_GetRegExpFromObj --
540     *
541     *      Compile a regular expression into a form suitable for fast
542     *      matching.  This procedure caches the result in a Tcl_Obj.
543     *
544     * Results:
545     *      The return value is a pointer to the compiled form of string,
546     *      suitable for passing to Tcl_RegExpExec.  If an error occurred
547     *      while compiling the pattern, then NULL is returned and an error
548     *      message is left in the interp's result.
549     *
550     * Side effects:
551     *      Updates the native rep of the Tcl_Obj.
552     *
553     *----------------------------------------------------------------------
554     */
555    
556    Tcl_RegExp
557    Tcl_GetRegExpFromObj(interp, objPtr, flags)
558        Tcl_Interp *interp;         /* For use in error reporting, and to access
559                                     * the interp regexp cache. */
560        Tcl_Obj *objPtr;            /* Object whose string rep contains regular
561                                     * expression pattern.  Internal rep will be
562                                     * changed to compiled form of this regular
563                                     * expression. */
564        int flags;                  /* Regular expression compilation flags. */
565    {
566        int length;
567        Tcl_ObjType *typePtr;
568        TclRegexp *regexpPtr;
569        char *pattern;
570    
571        typePtr = objPtr->typePtr;
572        regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
573    
574        if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
575            pattern = Tcl_GetStringFromObj(objPtr, &length);
576    
577            regexpPtr = CompileRegexp(interp, pattern, length, flags);
578            if (regexpPtr == NULL) {
579                return NULL;
580            }
581    
582            /*
583             * Add a reference to the regexp so it will persist even if it is
584             * pushed out of the current thread's regexp cache.  This reference
585             * will be removed when the object's internal rep is freed.
586             */
587    
588            regexpPtr->refCount++;
589    
590            /*
591             * Free the old representation and set our type.
592             */
593    
594            if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
595                (*typePtr->freeIntRepProc)(objPtr);
596            }
597            objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
598            objPtr->typePtr = &tclRegexpType;
599        }
600        return (Tcl_RegExp) regexpPtr;
601    }
602    
603    /*
604     *----------------------------------------------------------------------
605     *
606     * TclRegAbout --
607     *
608     *      Return information about a compiled regular expression.
609     *
610     * Results:
611     *      The return value is -1 for failure, 0 for success, although at
612     *      the moment there's nothing that could fail.  On success, a list
613     *      is left in the interp's result:  first element is the subexpression
614     *      count, second is a list of re_info bit names.
615     *
616     * Side effects:
617     *      None.
618     *
619     *----------------------------------------------------------------------
620     */
621    
622    int
623    TclRegAbout(interp, re)
624        Tcl_Interp *interp;         /* For use in variable assignment. */
625        Tcl_RegExp re;              /* The compiled regular expression. */
626    {
627        TclRegexp *regexpPtr = (TclRegexp *)re;
628        char buf[TCL_INTEGER_SPACE];
629        static struct infoname {
630            int bit;
631            char *text;
632        } infonames[] = {
633            {REG_UBACKREF,          "REG_UBACKREF"},
634            {REG_ULOOKAHEAD,        "REG_ULOOKAHEAD"},
635            {REG_UBOUNDS,           "REG_UBOUNDS"},
636            {REG_UBRACES,           "REG_UBRACES"},
637            {REG_UBSALNUM,          "REG_UBSALNUM"},
638            {REG_UPBOTCH,           "REG_UPBOTCH"},
639            {REG_UBBS,              "REG_UBBS"},
640            {REG_UNONPOSIX,         "REG_UNONPOSIX"},
641            {REG_UUNSPEC,           "REG_UUNSPEC"},
642            {REG_UUNPORT,           "REG_UUNPORT"},
643            {REG_ULOCALE,           "REG_ULOCALE"},
644            {REG_UEMPTYMATCH,       "REG_UEMPTYMATCH"},
645            {REG_UIMPOSSIBLE,       "REG_UIMPOSSIBLE"},
646            {REG_USHORTEST,         "REG_USHORTEST"},
647            {0,                     ""}
648        };
649        struct infoname *inf;
650        int n;
651    
652        Tcl_ResetResult(interp);
653    
654        sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
655        Tcl_AppendElement(interp, buf);
656    
657        /*
658         * Must count bits before generating list, because we must know
659         * whether {} are needed before we start appending names.
660         */
661        n = 0;
662        for (inf = infonames; inf->bit != 0; inf++) {
663            if (regexpPtr->re.re_info&inf->bit) {
664                n++;
665            }
666        }
667        if (n != 1) {
668            Tcl_AppendResult(interp, " {", NULL);
669        }
670        for (inf = infonames; inf->bit != 0; inf++) {
671            if (regexpPtr->re.re_info&inf->bit) {
672                Tcl_AppendElement(interp, inf->text);
673            }
674        }
675        if (n != 1) {
676            Tcl_AppendResult(interp, "}", NULL);
677        }
678    
679        return 0;
680    }
681    
682    /*
683     *----------------------------------------------------------------------
684     *
685     * TclRegError --
686     *
687     *      Generate an error message based on the regexp status code.
688     *
689     * Results:
690     *      Places an error in the interpreter.
691     *
692     * Side effects:
693     *      Sets errorCode as well.
694     *
695     *----------------------------------------------------------------------
696     */
697    
698    void
699    TclRegError(interp, msg, status)
700        Tcl_Interp *interp;         /* Interpreter for error reporting. */
701        char *msg;                  /* Message to prepend to error. */
702        int status;                 /* Status code to report. */
703    {
704        char buf[100];              /* ample in practice */
705        char cbuf[100];             /* lots in practice */
706        size_t n;
707        char *p;
708    
709        Tcl_ResetResult(interp);
710        n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
711        p = (n > sizeof(buf)) ? "..." : "";
712        Tcl_AppendResult(interp, msg, buf, p, NULL);
713    
714        sprintf(cbuf, "%d", status);
715        (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
716        Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
717    }
718    
719    
720    /*
721     *----------------------------------------------------------------------
722     *
723     * FreeRegexpInternalRep --
724     *
725     *      Deallocate the storage associated with a regexp object's internal
726     *      representation.
727     *
728     * Results:
729     *      None.
730     *
731     * Side effects:
732     *      Frees the compiled regular expression.
733     *
734     *----------------------------------------------------------------------
735     */
736    
737    static void
738    FreeRegexpInternalRep(objPtr)
739        Tcl_Obj *objPtr;            /* Regexp object with internal rep to free. */
740    {
741        TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
742    
743        /*
744         * If this is the last reference to the regexp, free it.
745         */
746    
747        if (--(regexpRepPtr->refCount) <= 0) {
748            FreeRegexp(regexpRepPtr);
749        }
750    }
751    
752    /*
753     *----------------------------------------------------------------------
754     *
755     * DupRegexpInternalRep --
756     *
757     *      We copy the reference to the compiled regexp and bump its
758     *      reference count.
759     *
760     * Results:
761     *      None.
762     *
763     * Side effects:
764     *      Increments the reference count of the regexp.
765     *
766     *----------------------------------------------------------------------
767     */
768    
769    static void
770    DupRegexpInternalRep(srcPtr, copyPtr)
771        Tcl_Obj *srcPtr;            /* Object with internal rep to copy. */
772        Tcl_Obj *copyPtr;           /* Object with internal rep to set. */
773    {
774        TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
775        regexpPtr->refCount++;
776        copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
777        copyPtr->typePtr = &tclRegexpType;
778    }
779    
780    /*
781     *----------------------------------------------------------------------
782     *
783     * SetRegexpFromAny --
784     *
785     *      Attempt to generate a compiled regular expression for the Tcl object
786     *      "objPtr".
787     *
788     * Results:
789     *      The return value is TCL_OK or TCL_ERROR. If an error occurs during
790     *      conversion, an error message is left in the interpreter's result
791     *      unless "interp" is NULL.
792     *
793     * Side effects:
794     *      If no error occurs, a regular expression is stored as "objPtr"s
795     *      internal representation.
796     *
797     *----------------------------------------------------------------------
798     */
799    
800    static int
801    SetRegexpFromAny(interp, objPtr)
802        Tcl_Interp *interp;         /* Used for error reporting if not NULL. */
803        Tcl_Obj *objPtr;            /* The object to convert. */
804    {
805        if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
806            return TCL_ERROR;
807        }
808        return TCL_OK;
809    }
810    
811    /*
812     *---------------------------------------------------------------------------
813     *
814     * CompileRegexp --
815     *
816     *      Attempt to compile the given regexp pattern.  If the compiled
817     *      regular expression can be found in the per-thread cache, it
818     *      will be used instead of compiling a new copy.
819     *
820     * Results:
821     *      The return value is a pointer to a newly allocated TclRegexp
822     *      that represents the compiled pattern, or NULL if the pattern
823     *      could not be compiled.  If NULL is returned, an error message is
824     *      left in the interp's result.
825     *
826     * Side effects:
827     *      The thread-local regexp cache is updated and a new TclRegexp may
828     *      be allocated.
829     *
830     *----------------------------------------------------------------------
831     */
832    
833    static TclRegexp *
834    CompileRegexp(interp, string, length, flags)
835        Tcl_Interp *interp;         /* Used for error reporting if not NULL. */
836        char *string;               /* The regexp to compile (UTF-8). */
837        int length;                 /* The length of the string in bytes. */
838        int flags;                  /* Compilation flags. */
839    {
840        TclRegexp *regexpPtr;
841        Tcl_UniChar *uniString;
842        int numChars;
843        Tcl_DString stringBuf;
844        int status, i;
845        ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
846    
847        if (!tsdPtr->initialized) {
848            tsdPtr->initialized = 1;
849            Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
850        }
851    
852        /*
853         * This routine maintains a second-level regular expression cache in
854         * addition to the per-object regexp cache.  The per-thread cache is needed
855         * to handle the case where for various reasons the object is lost between
856         * invocations of the regexp command, but the literal pattern is the same.
857         */
858    
859        /*
860         * Check the per-thread compiled regexp cache.  We can only reuse
861         * a regexp if it has the same pattern and the same flags.
862         */
863    
864        for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
865            if ((length == tsdPtr->patLengths[i])
866                    && (tsdPtr->regexps[i]->flags == flags)
867                    && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
868                /*
869                 * Move the matched pattern to the first slot in the
870                 * cache and shift the other patterns down one position.
871                 */
872    
873                if (i != 0) {
874                    int j;
875                    char *cachedString;
876    
877                    cachedString = tsdPtr->patterns[i];
878                    regexpPtr = tsdPtr->regexps[i];
879                    for (j = i-1; j >= 0; j--) {
880                        tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
881                        tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
882                        tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
883                    }
884                    tsdPtr->patterns[0] = cachedString;
885                    tsdPtr->patLengths[0] = length;
886                    tsdPtr->regexps[0] = regexpPtr;
887                }
888                return tsdPtr->regexps[0];
889            }
890        }
891    
892        /*
893         * This is a new expression, so compile it and add it to the cache.
894         */
895        
896        regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
897        regexpPtr->objPtr = NULL;
898        regexpPtr->string = NULL;
899        regexpPtr->details.rm_extend.rm_so = -1;
900        regexpPtr->details.rm_extend.rm_eo = -1;
901    
902        /*
903         * Get the up-to-date string representation and map to unicode.
904         */
905    
906        Tcl_DStringInit(&stringBuf);
907        uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
908        numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
909    
910        /*
911         * Compile the string and check for errors.
912         */
913    
914        regexpPtr->flags = flags;
915        status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
916        Tcl_DStringFree(&stringBuf);
917    
918        if (status != REG_OKAY) {
919            /*
920             * Clean up and report errors in the interpreter, if possible.
921             */
922    
923            ckfree((char *)regexpPtr);
924            if (interp) {
925                TclRegError(interp,
926                        "couldn't compile regular expression pattern: ",
927                        status);
928            }
929            return NULL;
930        }
931    
932        /*
933         * Allocate enough space for all of the subexpressions, plus one
934         * extra for the entire pattern.
935         */
936    
937        regexpPtr->matches = (regmatch_t *) ckalloc(
938                sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
939    
940        /*
941         * Initialize the refcount to one initially, since it is in the cache.
942         */
943    
944        regexpPtr->refCount = 1;
945    
946        /*
947         * Free the last regexp, if necessary, and make room at the head of the
948         * list for the new regexp.
949         */
950    
951        if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
952            TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
953            if (--(oldRegexpPtr->refCount) <= 0) {
954                FreeRegexp(oldRegexpPtr);
955            }
956            ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
957        }
958        for (i = NUM_REGEXPS - 2; i >= 0; i--) {
959            tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
960            tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
961            tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
962        }
963        tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
964        strcpy(tsdPtr->patterns[0], string);
965        tsdPtr->patLengths[0] = length;
966        tsdPtr->regexps[0] = regexpPtr;
967    
968        return regexpPtr;
969    }
970    
971    /*
972     *----------------------------------------------------------------------
973     *
974     * FreeRegexp --
975     *
976     *      Release the storage associated with a TclRegexp.
977     *
978     * Results:
979     *      None.
980     *
981     * Side effects:
982     *      None.
983     *
984     *----------------------------------------------------------------------
985     */
986    
987    static void
988    FreeRegexp(regexpPtr)
989        TclRegexp *regexpPtr;       /* Compiled regular expression to free. */
990    {
991        TclReFree(&regexpPtr->re);
992        if (regexpPtr->matches) {
993            ckfree((char *) regexpPtr->matches);
994        }
995        ckfree((char *) regexpPtr);
996    }
997    
998    /*
999     *----------------------------------------------------------------------
1000     *
1001     * FinalizeRegexp --
1002     *
1003     *      Release the storage associated with the per-thread regexp
1004     *      cache.
1005     *
1006     * Results:
1007     *      None.
1008     *
1009     * Side effects:
1010     *      None.
1011     *
1012     *----------------------------------------------------------------------
1013     */
1014    
1015    static void
1016    FinalizeRegexp(clientData)
1017        ClientData clientData;      /* Not used. */
1018    {
1019        int i;
1020        TclRegexp *regexpPtr;
1021        ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1022    
1023        for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1024            regexpPtr = tsdPtr->regexps[i];
1025            if (--(regexpPtr->refCount) <= 0) {
1026                FreeRegexp(regexpPtr);
1027            }
1028            ckfree(tsdPtr->patterns[i]);
1029        }
1030    }
1031    
1032    /* End of tclregexp.c */

Legend:
Removed from v.29  
changed lines
  Added in v.71

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25