/[dtapublic]/projs/trunk/shared_source/c_tcl_base_7_5_w_mods/tclregexp.c
ViewVC logotype

Diff of /projs/trunk/shared_source/c_tcl_base_7_5_w_mods/tclregexp.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 67 by dashley, Mon Oct 31 00:57:34 2016 UTC revision 71 by dashley, Sat Nov 5 11:07:06 2016 UTC
# Line 1  Line 1 
1  /* $Header$ */  /* $Header$ */
2  /*  /*
3   * tclRegexp.c --   * tclRegexp.c --
4   *   *
5   *      This file contains the public interfaces to the Tcl regular   *      This file contains the public interfaces to the Tcl regular
6   *      expression mechanism.   *      expression mechanism.
7   *   *
8   * Copyright (c) 1998 by Sun Microsystems, Inc.   * Copyright (c) 1998 by Sun Microsystems, Inc.
9   * Copyright (c) 1998-1999 by Scriptics Corporation.   * Copyright (c) 1998-1999 by Scriptics Corporation.
10   *   *
11   * See the file "license.terms" for information on usage and redistribution   * See the file "license.terms" for information on usage and redistribution
12   * of this file, and for a DISCLAIMER OF ALL WARRANTIES.   * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
13   *   *
14   * RCS: @(#) $Id: tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $   * RCS: @(#) $Id: tclregexp.c,v 1.1.1.1 2001/06/13 04:45:39 dtashley Exp $
15   */   */
16    
17  #include "tclInt.h"  #include "tclInt.h"
18  #include "tclPort.h"  #include "tclPort.h"
19  #include "tclRegexp.h"  #include "tclRegexp.h"
20    
21  /*  /*
22   *----------------------------------------------------------------------   *----------------------------------------------------------------------
23   * The routines in this file use Henry Spencer's regular expression   * The routines in this file use Henry Spencer's regular expression
24   * package contained in the following additional source files:   * package contained in the following additional source files:
25   *   *
26   *      regc_color.c    regc_cvec.c     regc_lex.c   *      regc_color.c    regc_cvec.c     regc_lex.c
27   *      regc_nfa.c      regcomp.c       regcustom.h   *      regc_nfa.c      regcomp.c       regcustom.h
28   *      rege_dfa.c      regerror.c      regerrs.h   *      rege_dfa.c      regerror.c      regerrs.h
29   *      regex.h         regexec.c       regfree.c   *      regex.h         regexec.c       regfree.c
30   *      regfronts.c     regguts.h   *      regfronts.c     regguts.h
31   *   *
32   * Copyright (c) 1998 Henry Spencer.  All rights reserved.   * Copyright (c) 1998 Henry Spencer.  All rights reserved.
33   *   *
34   * Development of this software was funded, in part, by Cray Research Inc.,   * Development of this software was funded, in part, by Cray Research Inc.,
35   * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics   * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
36   * Corporation, none of whom are responsible for the results.  The author   * Corporation, none of whom are responsible for the results.  The author
37   * thanks all of them.   * thanks all of them.
38   *   *
39   * Redistribution and use in source and binary forms -- with or without   * Redistribution and use in source and binary forms -- with or without
40   * modification -- are permitted for any purpose, provided that   * modification -- are permitted for any purpose, provided that
41   * redistributions in source form retain this entire copyright notice and   * redistributions in source form retain this entire copyright notice and
42   * indicate the origin and nature of any modifications.   * indicate the origin and nature of any modifications.
43   *   *
44   * I'd appreciate being given credit for this package in the documentation   * I'd appreciate being given credit for this package in the documentation
45   * of software which uses it, but that is not a requirement.   * of software which uses it, but that is not a requirement.
46   *   *
47   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
48   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
49   * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL   * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
50   * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,   * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
51   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
52   * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;   * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
53   * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,   * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
54   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
55   * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF   * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
56   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57   *   *
58   * *** NOTE: this code has been altered slightly for use in Tcl: ***   * *** NOTE: this code has been altered slightly for use in Tcl: ***
59   * *** 1. Names have been changed, e.g. from re_comp to          ***   * *** 1. Names have been changed, e.g. from re_comp to          ***
60   * ***    TclRegComp, to avoid clashes with other                ***   * ***    TclRegComp, to avoid clashes with other                ***
61   * ***    regexp implementations used by applications.           ***   * ***    regexp implementations used by applications.           ***
62   */   */
63    
64  /*  /*
65   * Thread local storage used to maintain a per-thread cache of compiled   * Thread local storage used to maintain a per-thread cache of compiled
66   * regular expressions.   * regular expressions.
67   */   */
68    
69  #define NUM_REGEXPS 30  #define NUM_REGEXPS 30
70    
71  typedef struct ThreadSpecificData {  typedef struct ThreadSpecificData {
72      int initialized;            /* Set to 1 when the module is initialized. */      int initialized;            /* Set to 1 when the module is initialized. */
73      char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled      char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled
74                                   * regular expression patterns.  NULL                                   * regular expression patterns.  NULL
75                                   * means that this slot isn't used.                                   * means that this slot isn't used.
76                                   * Malloc-ed. */                                   * Malloc-ed. */
77      int patLengths[NUM_REGEXPS];/* Number of non-null characters in      int patLengths[NUM_REGEXPS];/* Number of non-null characters in
78                                   * corresponding entry in patterns.                                   * corresponding entry in patterns.
79                                   * -1 means entry isn't used. */                                   * -1 means entry isn't used. */
80      struct TclRegexp *regexps[NUM_REGEXPS];      struct TclRegexp *regexps[NUM_REGEXPS];
81                                  /* Compiled forms of above strings.  Also                                  /* Compiled forms of above strings.  Also
82                                   * malloc-ed, or NULL if not in use yet. */                                   * malloc-ed, or NULL if not in use yet. */
83  } ThreadSpecificData;  } ThreadSpecificData;
84    
85  static Tcl_ThreadDataKey dataKey;  static Tcl_ThreadDataKey dataKey;
86    
87  /*  /*
88   * Declarations for functions used only in this file.   * Declarations for functions used only in this file.
89   */   */
90    
91  static TclRegexp *      CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,  static TclRegexp *      CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,
92                              char *pattern, int length, int flags));                              char *pattern, int length, int flags));
93  static void             DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,  static void             DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
94                              Tcl_Obj *copyPtr));                              Tcl_Obj *copyPtr));
95  static void             FinalizeRegexp _ANSI_ARGS_((ClientData clientData));  static void             FinalizeRegexp _ANSI_ARGS_((ClientData clientData));
96  static void             FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));  static void             FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));
97  static void             FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));  static void             FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
98  static int              RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,  static int              RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,
99                              Tcl_RegExp re, CONST Tcl_UniChar *uniString,                              Tcl_RegExp re, CONST Tcl_UniChar *uniString,
100                              int numChars, int nmatches, int flags));                              int numChars, int nmatches, int flags));
101  static int              SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,  static int              SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,
102                              Tcl_Obj *objPtr));                              Tcl_Obj *objPtr));
103    
104  /*  /*
105   * The regular expression Tcl object type.  This serves as a cache   * The regular expression Tcl object type.  This serves as a cache
106   * of the compiled form of the regular expression.   * of the compiled form of the regular expression.
107   */   */
108    
109  Tcl_ObjType tclRegexpType = {  Tcl_ObjType tclRegexpType = {
110      "regexp",                           /* name */      "regexp",                           /* name */
111      FreeRegexpInternalRep,              /* freeIntRepProc */      FreeRegexpInternalRep,              /* freeIntRepProc */
112      DupRegexpInternalRep,               /* dupIntRepProc */      DupRegexpInternalRep,               /* dupIntRepProc */
113      NULL,                               /* updateStringProc */      NULL,                               /* updateStringProc */
114      SetRegexpFromAny                    /* setFromAnyProc */      SetRegexpFromAny                    /* setFromAnyProc */
115  };  };
116    
117    
118  /*  /*
119   *----------------------------------------------------------------------   *----------------------------------------------------------------------
120   *   *
121   * Tcl_RegExpCompile --   * Tcl_RegExpCompile --
122   *   *
123   *      Compile a regular expression into a form suitable for fast   *      Compile a regular expression into a form suitable for fast
124   *      matching.  This procedure is DEPRECATED in favor of the   *      matching.  This procedure is DEPRECATED in favor of the
125   *      object version of the command.   *      object version of the command.
126   *   *
127   * Results:   * Results:
128   *      The return value is a pointer to the compiled form of string,   *      The return value is a pointer to the compiled form of string,
129   *      suitable for passing to Tcl_RegExpExec.  This compiled form   *      suitable for passing to Tcl_RegExpExec.  This compiled form
130   *      is only valid up until the next call to this procedure, so   *      is only valid up until the next call to this procedure, so
131   *      don't keep these around for a long time!  If an error occurred   *      don't keep these around for a long time!  If an error occurred
132   *      while compiling the pattern, then NULL is returned and an error   *      while compiling the pattern, then NULL is returned and an error
133   *      message is left in the interp's result.   *      message is left in the interp's result.
134   *   *
135   * Side effects:   * Side effects:
136   *      Updates the cache of compiled regexps.   *      Updates the cache of compiled regexps.
137   *   *
138   *----------------------------------------------------------------------   *----------------------------------------------------------------------
139   */   */
140    
141  Tcl_RegExp  Tcl_RegExp
142  Tcl_RegExpCompile(interp, string)  Tcl_RegExpCompile(interp, string)
143      Tcl_Interp *interp;         /* For use in error reporting and      Tcl_Interp *interp;         /* For use in error reporting and
144                                   * to access the interp regexp cache. */                                   * to access the interp regexp cache. */
145      char *string;               /* String for which to produce      char *string;               /* String for which to produce
146                                   * compiled regular expression. */                                   * compiled regular expression. */
147  {  {
148      return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),      return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),
149              REG_ADVANCED);              REG_ADVANCED);
150  }  }
151    
152  /*  /*
153   *----------------------------------------------------------------------   *----------------------------------------------------------------------
154   *   *
155   * Tcl_RegExpExec --   * Tcl_RegExpExec --
156   *   *
157   *      Execute the regular expression matcher using a compiled form   *      Execute the regular expression matcher using a compiled form
158   *      of a regular expression and save information about any match   *      of a regular expression and save information about any match
159   *      that is found.   *      that is found.
160   *   *
161   * Results:   * Results:
162   *      If an error occurs during the matching operation then -1   *      If an error occurs during the matching operation then -1
163   *      is returned and the interp's result contains an error message.   *      is returned and the interp's result contains an error message.
164   *      Otherwise the return value is 1 if a matching range is   *      Otherwise the return value is 1 if a matching range is
165   *      found and 0 if there is no matching range.   *      found and 0 if there is no matching range.
166   *   *
167   * Side effects:   * Side effects:
168   *      None.   *      None.
169   *   *
170   *----------------------------------------------------------------------   *----------------------------------------------------------------------
171   */   */
172    
173  int  int
174  Tcl_RegExpExec(interp, re, string, start)  Tcl_RegExpExec(interp, re, string, start)
175      Tcl_Interp *interp;         /* Interpreter to use for error reporting. */      Tcl_Interp *interp;         /* Interpreter to use for error reporting. */
176      Tcl_RegExp re;              /* Compiled regular expression;  must have      Tcl_RegExp re;              /* Compiled regular expression;  must have
177                                   * been returned by previous call to                                   * been returned by previous call to
178                                   * Tcl_GetRegExpFromObj. */                                   * Tcl_GetRegExpFromObj. */
179      CONST char *string;         /* String against which to match re. */      CONST char *string;         /* String against which to match re. */
180      CONST char *start;          /* If string is part of a larger string,      CONST char *start;          /* If string is part of a larger string,
181                                   * this identifies beginning of larger                                   * this identifies beginning of larger
182                                   * string, so that "^" won't match. */                                   * string, so that "^" won't match. */
183  {  {
184      int flags, result, numChars;      int flags, result, numChars;
185      TclRegexp *regexp = (TclRegexp *)re;      TclRegexp *regexp = (TclRegexp *)re;
186      Tcl_DString ds;      Tcl_DString ds;
187      Tcl_UniChar *ustr;      Tcl_UniChar *ustr;
188    
189      /*      /*
190       * If the starting point is offset from the beginning of the buffer,       * If the starting point is offset from the beginning of the buffer,
191       * then we need to tell the regexp engine not to match "^".       * then we need to tell the regexp engine not to match "^".
192       */       */
193    
194      if (string > start) {      if (string > start) {
195          flags = REG_NOTBOL;          flags = REG_NOTBOL;
196      } else {      } else {
197          flags = 0;          flags = 0;
198      }      }
199    
200      /*      /*
201       * Remember the string for use by Tcl_RegExpRange().       * Remember the string for use by Tcl_RegExpRange().
202       */       */
203    
204      regexp->string = string;      regexp->string = string;
205      regexp->objPtr = NULL;      regexp->objPtr = NULL;
206    
207      /*      /*
208       * Convert the string to Unicode and perform the match.       * Convert the string to Unicode and perform the match.
209       */       */
210    
211      Tcl_DStringInit(&ds);      Tcl_DStringInit(&ds);
212      ustr = Tcl_UtfToUniCharDString(string, -1, &ds);      ustr = Tcl_UtfToUniCharDString(string, -1, &ds);
213      numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);      numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);
214      result = RegExpExecUniChar(interp, re, ustr, numChars,      result = RegExpExecUniChar(interp, re, ustr, numChars,
215              -1 /* nmatches */, flags);              -1 /* nmatches */, flags);
216      Tcl_DStringFree(&ds);      Tcl_DStringFree(&ds);
217    
218      return result;      return result;
219  }  }
220    
221  /*  /*
222   *---------------------------------------------------------------------------   *---------------------------------------------------------------------------
223   *   *
224   * Tcl_RegExpRange --   * Tcl_RegExpRange --
225   *   *
226   *      Returns pointers describing the range of a regular expression match,   *      Returns pointers describing the range of a regular expression match,
227   *      or one of the subranges within the match.   *      or one of the subranges within the match.
228   *   *
229   * Results:   * Results:
230   *      The variables at *startPtr and *endPtr are modified to hold the   *      The variables at *startPtr and *endPtr are modified to hold the
231   *      addresses of the endpoints of the range given by index.  If the   *      addresses of the endpoints of the range given by index.  If the
232   *      specified range doesn't exist then NULLs are returned.   *      specified range doesn't exist then NULLs are returned.
233   *   *
234   * Side effects:   * Side effects:
235   *      None.   *      None.
236   *   *
237   *---------------------------------------------------------------------------   *---------------------------------------------------------------------------
238   */   */
239    
240  void  void
241  Tcl_RegExpRange(re, index, startPtr, endPtr)  Tcl_RegExpRange(re, index, startPtr, endPtr)
242      Tcl_RegExp re;              /* Compiled regular expression that has      Tcl_RegExp re;              /* Compiled regular expression that has
243                                   * been passed to Tcl_RegExpExec. */                                   * been passed to Tcl_RegExpExec. */
244      int index;                  /* 0 means give the range of the entire      int index;                  /* 0 means give the range of the entire
245                                   * match, > 0 means give the range of                                   * match, > 0 means give the range of
246                                   * a matching subrange. */                                   * a matching subrange. */
247      char **startPtr;            /* Store address of first character in      char **startPtr;            /* Store address of first character in
248                                   * (sub-) range here. */                                   * (sub-) range here. */
249      char **endPtr;              /* Store address of character just after last      char **endPtr;              /* Store address of character just after last
250                                   * in (sub-) range here. */                                   * in (sub-) range here. */
251  {  {
252      TclRegexp *regexpPtr = (TclRegexp *) re;      TclRegexp *regexpPtr = (TclRegexp *) re;
253      CONST char *string;      CONST char *string;
254    
255      if ((size_t) index > regexpPtr->re.re_nsub) {      if ((size_t) index > regexpPtr->re.re_nsub) {
256          *startPtr = *endPtr = NULL;          *startPtr = *endPtr = NULL;
257      } else if (regexpPtr->matches[index].rm_so < 0) {      } else if (regexpPtr->matches[index].rm_so < 0) {
258          *startPtr = *endPtr = NULL;          *startPtr = *endPtr = NULL;
259      } else {      } else {
260          if (regexpPtr->objPtr) {          if (regexpPtr->objPtr) {
261              string = Tcl_GetString(regexpPtr->objPtr);              string = Tcl_GetString(regexpPtr->objPtr);
262          } else {          } else {
263              string = regexpPtr->string;              string = regexpPtr->string;
264          }          }
265          *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);          *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);
266          *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);          *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);
267      }      }
268  }  }
269    
270  /*  /*
271   *---------------------------------------------------------------------------   *---------------------------------------------------------------------------
272   *   *
273   * RegExpExecUniChar --   * RegExpExecUniChar --
274   *   *
275   *      Execute the regular expression matcher using a compiled form of a   *      Execute the regular expression matcher using a compiled form of a
276   *      regular expression and save information about any match that is   *      regular expression and save information about any match that is
277   *      found.   *      found.
278   *   *
279   * Results:   * Results:
280   *      If an error occurs during the matching operation then -1 is   *      If an error occurs during the matching operation then -1 is
281   *      returned and an error message is left in interp's result.   *      returned and an error message is left in interp's result.
282   *      Otherwise the return value is 1 if a matching range was found or   *      Otherwise the return value is 1 if a matching range was found or
283   *      0 if there was no matching range.   *      0 if there was no matching range.
284   *   *
285   * Side effects:   * Side effects:
286   *      None.   *      None.
287   *   *
288   *----------------------------------------------------------------------   *----------------------------------------------------------------------
289   */   */
290    
291  static int  static int
292  RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)  RegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)
293      Tcl_Interp *interp;         /* Interpreter to use for error reporting. */      Tcl_Interp *interp;         /* Interpreter to use for error reporting. */
294      Tcl_RegExp re;              /* Compiled regular expression; returned by      Tcl_RegExp re;              /* Compiled regular expression; returned by
295                                   * a previous call to Tcl_GetRegExpFromObj */                                   * a previous call to Tcl_GetRegExpFromObj */
296      CONST Tcl_UniChar *wString; /* String against which to match re. */      CONST Tcl_UniChar *wString; /* String against which to match re. */
297      int numChars;               /* Length of Tcl_UniChar string (must      int numChars;               /* Length of Tcl_UniChar string (must
298                                   * be >= 0). */                                   * be >= 0). */
299      int nmatches;               /* How many subexpression matches (counting      int nmatches;               /* How many subexpression matches (counting
300                                   * the whole match as subexpression 0) are                                   * the whole match as subexpression 0) are
301                                   * of interest.  -1 means "don't know". */                                   * of interest.  -1 means "don't know". */
302      int flags;                  /* Regular expression flags. */      int flags;                  /* Regular expression flags. */
303  {  {
304      int status;      int status;
305      TclRegexp *regexpPtr = (TclRegexp *) re;      TclRegexp *regexpPtr = (TclRegexp *) re;
306      size_t last = regexpPtr->re.re_nsub + 1;      size_t last = regexpPtr->re.re_nsub + 1;
307      size_t nm = last;      size_t nm = last;
308    
309      if (nmatches >= 0 && (size_t) nmatches < nm) {      if (nmatches >= 0 && (size_t) nmatches < nm) {
310          nm = (size_t) nmatches;          nm = (size_t) nmatches;
311      }      }
312    
313      status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,      status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,
314              &regexpPtr->details, nm, regexpPtr->matches, flags);              &regexpPtr->details, nm, regexpPtr->matches, flags);
315    
316      /*      /*
317       * Check for errors.       * Check for errors.
318       */       */
319    
320      if (status != REG_OKAY) {      if (status != REG_OKAY) {
321          if (status == REG_NOMATCH) {          if (status == REG_NOMATCH) {
322              return 0;              return 0;
323          }          }
324          if (interp != NULL) {          if (interp != NULL) {
325              TclRegError(interp, "error while matching regular expression: ",              TclRegError(interp, "error while matching regular expression: ",
326                      status);                      status);
327          }          }
328          return -1;          return -1;
329      }      }
330      return 1;      return 1;
331  }  }
332    
333  /*  /*
334   *---------------------------------------------------------------------------   *---------------------------------------------------------------------------
335   *   *
336   * TclRegExpRangeUniChar --   * TclRegExpRangeUniChar --
337   *   *
338   *      Returns pointers describing the range of a regular expression match,   *      Returns pointers describing the range of a regular expression match,
339   *      or one of the subranges within the match, or the hypothetical range   *      or one of the subranges within the match, or the hypothetical range
340   *      represented by the rm_extend field of the rm_detail_t.   *      represented by the rm_extend field of the rm_detail_t.
341   *   *
342   * Results:   * Results:
343   *      The variables at *startPtr and *endPtr are modified to hold the   *      The variables at *startPtr and *endPtr are modified to hold the
344   *      offsets of the endpoints of the range given by index.  If the   *      offsets of the endpoints of the range given by index.  If the
345   *      specified range doesn't exist then -1s are supplied.   *      specified range doesn't exist then -1s are supplied.
346   *   *
347   * Side effects:   * Side effects:
348   *      None.   *      None.
349   *   *
350   *---------------------------------------------------------------------------   *---------------------------------------------------------------------------
351   */   */
352    
353  void  void
354  TclRegExpRangeUniChar(re, index, startPtr, endPtr)  TclRegExpRangeUniChar(re, index, startPtr, endPtr)
355      Tcl_RegExp re;              /* Compiled regular expression that has      Tcl_RegExp re;              /* Compiled regular expression that has
356                                   * been passed to Tcl_RegExpExec. */                                   * been passed to Tcl_RegExpExec. */
357      int index;                  /* 0 means give the range of the entire      int index;                  /* 0 means give the range of the entire
358                                   * match, > 0 means give the range of                                   * match, > 0 means give the range of
359                                   * a matching subrange, -1 means the                                   * a matching subrange, -1 means the
360                                   * range of the rm_extend field. */                                   * range of the rm_extend field. */
361      int *startPtr;              /* Store address of first character in      int *startPtr;              /* Store address of first character in
362                                   * (sub-) range here. */                                   * (sub-) range here. */
363      int *endPtr;                /* Store address of character just after last      int *endPtr;                /* Store address of character just after last
364                                   * in (sub-) range here. */                                   * in (sub-) range here. */
365  {  {
366      TclRegexp *regexpPtr = (TclRegexp *) re;      TclRegexp *regexpPtr = (TclRegexp *) re;
367    
368      if ((regexpPtr->flags&REG_EXPECT) && index == -1) {      if ((regexpPtr->flags&REG_EXPECT) && index == -1) {
369          *startPtr = regexpPtr->details.rm_extend.rm_so;          *startPtr = regexpPtr->details.rm_extend.rm_so;
370          *endPtr = regexpPtr->details.rm_extend.rm_eo;          *endPtr = regexpPtr->details.rm_extend.rm_eo;
371      } else if ((size_t) index > regexpPtr->re.re_nsub) {      } else if ((size_t) index > regexpPtr->re.re_nsub) {
372          *startPtr = -1;          *startPtr = -1;
373          *endPtr = -1;          *endPtr = -1;
374      } else {      } else {
375          *startPtr = regexpPtr->matches[index].rm_so;          *startPtr = regexpPtr->matches[index].rm_so;
376          *endPtr = regexpPtr->matches[index].rm_eo;          *endPtr = regexpPtr->matches[index].rm_eo;
377      }      }
378  }  }
379    
380  /*  /*
381   *----------------------------------------------------------------------   *----------------------------------------------------------------------
382   *   *
383   * Tcl_RegExpMatch --   * Tcl_RegExpMatch --
384   *   *
385   *      See if a string matches a regular expression.   *      See if a string matches a regular expression.
386   *   *
387   * Results:   * Results:
388   *      If an error occurs during the matching operation then -1   *      If an error occurs during the matching operation then -1
389   *      is returned and the interp's result contains an error message.   *      is returned and the interp's result contains an error message.
390   *      Otherwise the return value is 1 if "string" matches "pattern"   *      Otherwise the return value is 1 if "string" matches "pattern"
391   *      and 0 otherwise.   *      and 0 otherwise.
392   *   *
393   * Side effects:   * Side effects:
394   *      None.   *      None.
395   *   *
396   *----------------------------------------------------------------------   *----------------------------------------------------------------------
397   */   */
398    
399  int  int
400  Tcl_RegExpMatch(interp, string, pattern)  Tcl_RegExpMatch(interp, string, pattern)
401      Tcl_Interp *interp;         /* Used for error reporting. May be NULL. */      Tcl_Interp *interp;         /* Used for error reporting. May be NULL. */
402      char *string;               /* String. */      char *string;               /* String. */
403      char *pattern;              /* Regular expression to match against      char *pattern;              /* Regular expression to match against
404                                   * string. */                                   * string. */
405  {  {
406      Tcl_RegExp re;      Tcl_RegExp re;
407    
408      re = Tcl_RegExpCompile(interp, pattern);      re = Tcl_RegExpCompile(interp, pattern);
409      if (re == NULL) {      if (re == NULL) {
410          return -1;          return -1;
411      }      }
412      return Tcl_RegExpExec(interp, re, string, string);      return Tcl_RegExpExec(interp, re, string, string);
413  }  }
414    
415  /*  /*
416   *----------------------------------------------------------------------   *----------------------------------------------------------------------
417   *   *
418   * Tcl_RegExpExecObj --   * Tcl_RegExpExecObj --
419   *   *
420   *      Execute a precompiled regexp against the given object.   *      Execute a precompiled regexp against the given object.
421   *   *
422   * Results:   * Results:
423   *      If an error occurs during the matching operation then -1   *      If an error occurs during the matching operation then -1
424   *      is returned and the interp's result contains an error message.   *      is returned and the interp's result contains an error message.
425   *      Otherwise the return value is 1 if "string" matches "pattern"   *      Otherwise the return value is 1 if "string" matches "pattern"
426   *      and 0 otherwise.   *      and 0 otherwise.
427   *   *
428   * Side effects:   * Side effects:
429   *      Converts the object to a Unicode object.   *      Converts the object to a Unicode object.
430   *   *
431   *----------------------------------------------------------------------   *----------------------------------------------------------------------
432   */   */
433    
434  int  int
435  Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)  Tcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)
436      Tcl_Interp *interp;         /* Interpreter to use for error reporting. */      Tcl_Interp *interp;         /* Interpreter to use for error reporting. */
437      Tcl_RegExp re;              /* Compiled regular expression;  must have      Tcl_RegExp re;              /* Compiled regular expression;  must have
438                                   * been returned by previous call to                                   * been returned by previous call to
439                                   * Tcl_GetRegExpFromObj. */                                   * Tcl_GetRegExpFromObj. */
440      Tcl_Obj *objPtr;            /* String against which to match re. */      Tcl_Obj *objPtr;            /* String against which to match re. */
441      int offset;                 /* Character index that marks where matching      int offset;                 /* Character index that marks where matching
442                                   * should begin. */                                   * should begin. */
443      int nmatches;               /* How many subexpression matches (counting      int nmatches;               /* How many subexpression matches (counting
444                                   * the whole match as subexpression 0) are                                   * the whole match as subexpression 0) are
445                                   * of interest.  -1 means all of them. */                                   * of interest.  -1 means all of them. */
446      int flags;                  /* Regular expression execution flags. */      int flags;                  /* Regular expression execution flags. */
447  {  {
448      TclRegexp *regexpPtr = (TclRegexp *) re;      TclRegexp *regexpPtr = (TclRegexp *) re;
449      Tcl_UniChar *udata;      Tcl_UniChar *udata;
450      int length;      int length;
451    
452      /*      /*
453       * Save the target object so we can extract strings from it later.       * Save the target object so we can extract strings from it later.
454       */       */
455    
456      regexpPtr->string = NULL;      regexpPtr->string = NULL;
457      regexpPtr->objPtr = objPtr;      regexpPtr->objPtr = objPtr;
458    
459      udata = Tcl_GetUnicode(objPtr);      udata = Tcl_GetUnicode(objPtr);
460      length = Tcl_GetCharLength(objPtr);      length = Tcl_GetCharLength(objPtr);
461    
462      if (offset > length) {      if (offset > length) {
463          offset = length;          offset = length;
464      }      }
465      udata += offset;      udata += offset;
466      length -= offset;      length -= offset;
467            
468      return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);      return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);
469  }  }
470    
471  /*  /*
472   *----------------------------------------------------------------------   *----------------------------------------------------------------------
473   *   *
474   * Tcl_RegExpMatchObj --   * Tcl_RegExpMatchObj --
475   *   *
476   *      See if an object matches a regular expression.   *      See if an object matches a regular expression.
477   *   *
478   * Results:   * Results:
479   *      If an error occurs during the matching operation then -1   *      If an error occurs during the matching operation then -1
480   *      is returned and the interp's result contains an error message.   *      is returned and the interp's result contains an error message.
481   *      Otherwise the return value is 1 if "string" matches "pattern"   *      Otherwise the return value is 1 if "string" matches "pattern"
482   *      and 0 otherwise.   *      and 0 otherwise.
483   *   *
484   * Side effects:   * Side effects:
485   *      Changes the internal rep of the pattern and string objects.   *      Changes the internal rep of the pattern and string objects.
486   *   *
487   *----------------------------------------------------------------------   *----------------------------------------------------------------------
488   */   */
489    
490  int  int
491  Tcl_RegExpMatchObj(interp, stringObj, patternObj)  Tcl_RegExpMatchObj(interp, stringObj, patternObj)
492      Tcl_Interp *interp;         /* Used for error reporting. May be NULL. */      Tcl_Interp *interp;         /* Used for error reporting. May be NULL. */
493      Tcl_Obj *stringObj;         /* Object containing the String to search. */      Tcl_Obj *stringObj;         /* Object containing the String to search. */
494      Tcl_Obj *patternObj;        /* Regular expression to match against      Tcl_Obj *patternObj;        /* Regular expression to match against
495                                   * string. */                                   * string. */
496  {  {
497      Tcl_RegExp re;      Tcl_RegExp re;
498    
499      re = Tcl_GetRegExpFromObj(interp, patternObj,      re = Tcl_GetRegExpFromObj(interp, patternObj,
500              TCL_REG_ADVANCED | TCL_REG_NOSUB);              TCL_REG_ADVANCED | TCL_REG_NOSUB);
501      if (re == NULL) {      if (re == NULL) {
502          return -1;          return -1;
503      }      }
504      return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,      return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,
505              0 /* nmatches */, 0 /* flags */);              0 /* nmatches */, 0 /* flags */);
506  }  }
507    
508  /*  /*
509   *----------------------------------------------------------------------   *----------------------------------------------------------------------
510   *   *
511   * Tcl_RegExpGetInfo --   * Tcl_RegExpGetInfo --
512   *   *
513   *      Retrieve information about the current match.   *      Retrieve information about the current match.
514   *   *
515   * Results:   * Results:
516   *      None.   *      None.
517   *   *
518   * Side effects:   * Side effects:
519   *      None.   *      None.
520   *   *
521   *----------------------------------------------------------------------   *----------------------------------------------------------------------
522   */   */
523    
524  void  void
525  Tcl_RegExpGetInfo(regexp, infoPtr)  Tcl_RegExpGetInfo(regexp, infoPtr)
526      Tcl_RegExp regexp;          /* Pattern from which to get subexpressions. */      Tcl_RegExp regexp;          /* Pattern from which to get subexpressions. */
527      Tcl_RegExpInfo *infoPtr;    /* Match information is stored here.  */      Tcl_RegExpInfo *infoPtr;    /* Match information is stored here.  */
528  {  {
529      TclRegexp *regexpPtr = (TclRegexp *) regexp;      TclRegexp *regexpPtr = (TclRegexp *) regexp;
530    
531      infoPtr->nsubs = regexpPtr->re.re_nsub;      infoPtr->nsubs = regexpPtr->re.re_nsub;
532      infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;      infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches;
533      infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;      infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so;
534  }  }
535    
536  /*  /*
537   *----------------------------------------------------------------------   *----------------------------------------------------------------------
538   *   *
539   * Tcl_GetRegExpFromObj --   * Tcl_GetRegExpFromObj --
540   *   *
541   *      Compile a regular expression into a form suitable for fast   *      Compile a regular expression into a form suitable for fast
542   *      matching.  This procedure caches the result in a Tcl_Obj.   *      matching.  This procedure caches the result in a Tcl_Obj.
543   *   *
544   * Results:   * Results:
545   *      The return value is a pointer to the compiled form of string,   *      The return value is a pointer to the compiled form of string,
546   *      suitable for passing to Tcl_RegExpExec.  If an error occurred   *      suitable for passing to Tcl_RegExpExec.  If an error occurred
547   *      while compiling the pattern, then NULL is returned and an error   *      while compiling the pattern, then NULL is returned and an error
548   *      message is left in the interp's result.   *      message is left in the interp's result.
549   *   *
550   * Side effects:   * Side effects:
551   *      Updates the native rep of the Tcl_Obj.   *      Updates the native rep of the Tcl_Obj.
552   *   *
553   *----------------------------------------------------------------------   *----------------------------------------------------------------------
554   */   */
555    
556  Tcl_RegExp  Tcl_RegExp
557  Tcl_GetRegExpFromObj(interp, objPtr, flags)  Tcl_GetRegExpFromObj(interp, objPtr, flags)
558      Tcl_Interp *interp;         /* For use in error reporting, and to access      Tcl_Interp *interp;         /* For use in error reporting, and to access
559                                   * the interp regexp cache. */                                   * the interp regexp cache. */
560      Tcl_Obj *objPtr;            /* Object whose string rep contains regular      Tcl_Obj *objPtr;            /* Object whose string rep contains regular
561                                   * expression pattern.  Internal rep will be                                   * expression pattern.  Internal rep will be
562                                   * changed to compiled form of this regular                                   * changed to compiled form of this regular
563                                   * expression. */                                   * expression. */
564      int flags;                  /* Regular expression compilation flags. */      int flags;                  /* Regular expression compilation flags. */
565  {  {
566      int length;      int length;
567      Tcl_ObjType *typePtr;      Tcl_ObjType *typePtr;
568      TclRegexp *regexpPtr;      TclRegexp *regexpPtr;
569      char *pattern;      char *pattern;
570    
571      typePtr = objPtr->typePtr;      typePtr = objPtr->typePtr;
572      regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;      regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
573    
574      if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {      if ((typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) {
575          pattern = Tcl_GetStringFromObj(objPtr, &length);          pattern = Tcl_GetStringFromObj(objPtr, &length);
576    
577          regexpPtr = CompileRegexp(interp, pattern, length, flags);          regexpPtr = CompileRegexp(interp, pattern, length, flags);
578          if (regexpPtr == NULL) {          if (regexpPtr == NULL) {
579              return NULL;              return NULL;
580          }          }
581    
582          /*          /*
583           * Add a reference to the regexp so it will persist even if it is           * Add a reference to the regexp so it will persist even if it is
584           * pushed out of the current thread's regexp cache.  This reference           * pushed out of the current thread's regexp cache.  This reference
585           * will be removed when the object's internal rep is freed.           * will be removed when the object's internal rep is freed.
586           */           */
587    
588          regexpPtr->refCount++;          regexpPtr->refCount++;
589    
590          /*          /*
591           * Free the old representation and set our type.           * Free the old representation and set our type.
592           */           */
593    
594          if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {          if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
595              (*typePtr->freeIntRepProc)(objPtr);              (*typePtr->freeIntRepProc)(objPtr);
596          }          }
597          objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;          objPtr->internalRep.otherValuePtr = (VOID *) regexpPtr;
598          objPtr->typePtr = &tclRegexpType;          objPtr->typePtr = &tclRegexpType;
599      }      }
600      return (Tcl_RegExp) regexpPtr;      return (Tcl_RegExp) regexpPtr;
601  }  }
602    
603  /*  /*
604   *----------------------------------------------------------------------   *----------------------------------------------------------------------
605   *   *
606   * TclRegAbout --   * TclRegAbout --
607   *   *
608   *      Return information about a compiled regular expression.   *      Return information about a compiled regular expression.
609   *   *
610   * Results:   * Results:
611   *      The return value is -1 for failure, 0 for success, although at   *      The return value is -1 for failure, 0 for success, although at
612   *      the moment there's nothing that could fail.  On success, a list   *      the moment there's nothing that could fail.  On success, a list
613   *      is left in the interp's result:  first element is the subexpression   *      is left in the interp's result:  first element is the subexpression
614   *      count, second is a list of re_info bit names.   *      count, second is a list of re_info bit names.
615   *   *
616   * Side effects:   * Side effects:
617   *      None.   *      None.
618   *   *
619   *----------------------------------------------------------------------   *----------------------------------------------------------------------
620   */   */
621    
622  int  int
623  TclRegAbout(interp, re)  TclRegAbout(interp, re)
624      Tcl_Interp *interp;         /* For use in variable assignment. */      Tcl_Interp *interp;         /* For use in variable assignment. */
625      Tcl_RegExp re;              /* The compiled regular expression. */      Tcl_RegExp re;              /* The compiled regular expression. */
626  {  {
627      TclRegexp *regexpPtr = (TclRegexp *)re;      TclRegexp *regexpPtr = (TclRegexp *)re;
628      char buf[TCL_INTEGER_SPACE];      char buf[TCL_INTEGER_SPACE];
629      static struct infoname {      static struct infoname {
630          int bit;          int bit;
631          char *text;          char *text;
632      } infonames[] = {      } infonames[] = {
633          {REG_UBACKREF,          "REG_UBACKREF"},          {REG_UBACKREF,          "REG_UBACKREF"},
634          {REG_ULOOKAHEAD,        "REG_ULOOKAHEAD"},          {REG_ULOOKAHEAD,        "REG_ULOOKAHEAD"},
635          {REG_UBOUNDS,           "REG_UBOUNDS"},          {REG_UBOUNDS,           "REG_UBOUNDS"},
636          {REG_UBRACES,           "REG_UBRACES"},          {REG_UBRACES,           "REG_UBRACES"},
637          {REG_UBSALNUM,          "REG_UBSALNUM"},          {REG_UBSALNUM,          "REG_UBSALNUM"},
638          {REG_UPBOTCH,           "REG_UPBOTCH"},          {REG_UPBOTCH,           "REG_UPBOTCH"},
639          {REG_UBBS,              "REG_UBBS"},          {REG_UBBS,              "REG_UBBS"},
640          {REG_UNONPOSIX,         "REG_UNONPOSIX"},          {REG_UNONPOSIX,         "REG_UNONPOSIX"},
641          {REG_UUNSPEC,           "REG_UUNSPEC"},          {REG_UUNSPEC,           "REG_UUNSPEC"},
642          {REG_UUNPORT,           "REG_UUNPORT"},          {REG_UUNPORT,           "REG_UUNPORT"},
643          {REG_ULOCALE,           "REG_ULOCALE"},          {REG_ULOCALE,           "REG_ULOCALE"},
644          {REG_UEMPTYMATCH,       "REG_UEMPTYMATCH"},          {REG_UEMPTYMATCH,       "REG_UEMPTYMATCH"},
645          {REG_UIMPOSSIBLE,       "REG_UIMPOSSIBLE"},          {REG_UIMPOSSIBLE,       "REG_UIMPOSSIBLE"},
646          {REG_USHORTEST,         "REG_USHORTEST"},          {REG_USHORTEST,         "REG_USHORTEST"},
647          {0,                     ""}          {0,                     ""}
648      };      };
649      struct infoname *inf;      struct infoname *inf;
650      int n;      int n;
651    
652      Tcl_ResetResult(interp);      Tcl_ResetResult(interp);
653    
654      sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));      sprintf(buf, "%u", (unsigned)(regexpPtr->re.re_nsub));
655      Tcl_AppendElement(interp, buf);      Tcl_AppendElement(interp, buf);
656    
657      /*      /*
658       * Must count bits before generating list, because we must know       * Must count bits before generating list, because we must know
659       * whether {} are needed before we start appending names.       * whether {} are needed before we start appending names.
660       */       */
661      n = 0;      n = 0;
662      for (inf = infonames; inf->bit != 0; inf++) {      for (inf = infonames; inf->bit != 0; inf++) {
663          if (regexpPtr->re.re_info&inf->bit) {          if (regexpPtr->re.re_info&inf->bit) {
664              n++;              n++;
665          }          }
666      }      }
667      if (n != 1) {      if (n != 1) {
668          Tcl_AppendResult(interp, " {", NULL);          Tcl_AppendResult(interp, " {", NULL);
669      }      }
670      for (inf = infonames; inf->bit != 0; inf++) {      for (inf = infonames; inf->bit != 0; inf++) {
671          if (regexpPtr->re.re_info&inf->bit) {          if (regexpPtr->re.re_info&inf->bit) {
672              Tcl_AppendElement(interp, inf->text);              Tcl_AppendElement(interp, inf->text);
673          }          }
674      }      }
675      if (n != 1) {      if (n != 1) {
676          Tcl_AppendResult(interp, "}", NULL);          Tcl_AppendResult(interp, "}", NULL);
677      }      }
678    
679      return 0;      return 0;
680  }  }
681    
682  /*  /*
683   *----------------------------------------------------------------------   *----------------------------------------------------------------------
684   *   *
685   * TclRegError --   * TclRegError --
686   *   *
687   *      Generate an error message based on the regexp status code.   *      Generate an error message based on the regexp status code.
688   *   *
689   * Results:   * Results:
690   *      Places an error in the interpreter.   *      Places an error in the interpreter.
691   *   *
692   * Side effects:   * Side effects:
693   *      Sets errorCode as well.   *      Sets errorCode as well.
694   *   *
695   *----------------------------------------------------------------------   *----------------------------------------------------------------------
696   */   */
697    
698  void  void
699  TclRegError(interp, msg, status)  TclRegError(interp, msg, status)
700      Tcl_Interp *interp;         /* Interpreter for error reporting. */      Tcl_Interp *interp;         /* Interpreter for error reporting. */
701      char *msg;                  /* Message to prepend to error. */      char *msg;                  /* Message to prepend to error. */
702      int status;                 /* Status code to report. */      int status;                 /* Status code to report. */
703  {  {
704      char buf[100];              /* ample in practice */      char buf[100];              /* ample in practice */
705      char cbuf[100];             /* lots in practice */      char cbuf[100];             /* lots in practice */
706      size_t n;      size_t n;
707      char *p;      char *p;
708    
709      Tcl_ResetResult(interp);      Tcl_ResetResult(interp);
710      n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));      n = TclReError(status, (regex_t *)NULL, buf, sizeof(buf));
711      p = (n > sizeof(buf)) ? "..." : "";      p = (n > sizeof(buf)) ? "..." : "";
712      Tcl_AppendResult(interp, msg, buf, p, NULL);      Tcl_AppendResult(interp, msg, buf, p, NULL);
713    
714      sprintf(cbuf, "%d", status);      sprintf(cbuf, "%d", status);
715      (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));      (VOID) TclReError(REG_ITOA, (regex_t *)NULL, cbuf, sizeof(cbuf));
716      Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);      Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL);
717  }  }
718    
719    
720  /*  /*
721   *----------------------------------------------------------------------   *----------------------------------------------------------------------
722   *   *
723   * FreeRegexpInternalRep --   * FreeRegexpInternalRep --
724   *   *
725   *      Deallocate the storage associated with a regexp object's internal   *      Deallocate the storage associated with a regexp object's internal
726   *      representation.   *      representation.
727   *   *
728   * Results:   * Results:
729   *      None.   *      None.
730   *   *
731   * Side effects:   * Side effects:
732   *      Frees the compiled regular expression.   *      Frees the compiled regular expression.
733   *   *
734   *----------------------------------------------------------------------   *----------------------------------------------------------------------
735   */   */
736    
737  static void  static void
738  FreeRegexpInternalRep(objPtr)  FreeRegexpInternalRep(objPtr)
739      Tcl_Obj *objPtr;            /* Regexp object with internal rep to free. */      Tcl_Obj *objPtr;            /* Regexp object with internal rep to free. */
740  {  {
741      TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;      TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr;
742    
743      /*      /*
744       * If this is the last reference to the regexp, free it.       * If this is the last reference to the regexp, free it.
745       */       */
746    
747      if (--(regexpRepPtr->refCount) <= 0) {      if (--(regexpRepPtr->refCount) <= 0) {
748          FreeRegexp(regexpRepPtr);          FreeRegexp(regexpRepPtr);
749      }      }
750  }  }
751    
752  /*  /*
753   *----------------------------------------------------------------------   *----------------------------------------------------------------------
754   *   *
755   * DupRegexpInternalRep --   * DupRegexpInternalRep --
756   *   *
757   *      We copy the reference to the compiled regexp and bump its   *      We copy the reference to the compiled regexp and bump its
758   *      reference count.   *      reference count.
759   *   *
760   * Results:   * Results:
761   *      None.   *      None.
762   *   *
763   * Side effects:   * Side effects:
764   *      Increments the reference count of the regexp.   *      Increments the reference count of the regexp.
765   *   *
766   *----------------------------------------------------------------------   *----------------------------------------------------------------------
767   */   */
768    
769  static void  static void
770  DupRegexpInternalRep(srcPtr, copyPtr)  DupRegexpInternalRep(srcPtr, copyPtr)
771      Tcl_Obj *srcPtr;            /* Object with internal rep to copy. */      Tcl_Obj *srcPtr;            /* Object with internal rep to copy. */
772      Tcl_Obj *copyPtr;           /* Object with internal rep to set. */      Tcl_Obj *copyPtr;           /* Object with internal rep to set. */
773  {  {
774      TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;      TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr;
775      regexpPtr->refCount++;      regexpPtr->refCount++;
776      copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;      copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr;
777      copyPtr->typePtr = &tclRegexpType;      copyPtr->typePtr = &tclRegexpType;
778  }  }
779    
780  /*  /*
781   *----------------------------------------------------------------------   *----------------------------------------------------------------------
782   *   *
783   * SetRegexpFromAny --   * SetRegexpFromAny --
784   *   *
785   *      Attempt to generate a compiled regular expression for the Tcl object   *      Attempt to generate a compiled regular expression for the Tcl object
786   *      "objPtr".   *      "objPtr".
787   *   *
788   * Results:   * Results:
789   *      The return value is TCL_OK or TCL_ERROR. If an error occurs during   *      The return value is TCL_OK or TCL_ERROR. If an error occurs during
790   *      conversion, an error message is left in the interpreter's result   *      conversion, an error message is left in the interpreter's result
791   *      unless "interp" is NULL.   *      unless "interp" is NULL.
792   *   *
793   * Side effects:   * Side effects:
794   *      If no error occurs, a regular expression is stored as "objPtr"s   *      If no error occurs, a regular expression is stored as "objPtr"s
795   *      internal representation.   *      internal representation.
796   *   *
797   *----------------------------------------------------------------------   *----------------------------------------------------------------------
798   */   */
799    
800  static int  static int
801  SetRegexpFromAny(interp, objPtr)  SetRegexpFromAny(interp, objPtr)
802      Tcl_Interp *interp;         /* Used for error reporting if not NULL. */      Tcl_Interp *interp;         /* Used for error reporting if not NULL. */
803      Tcl_Obj *objPtr;            /* The object to convert. */      Tcl_Obj *objPtr;            /* The object to convert. */
804  {  {
805      if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {      if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) {
806          return TCL_ERROR;          return TCL_ERROR;
807      }      }
808      return TCL_OK;      return TCL_OK;
809  }  }
810    
811  /*  /*
812   *---------------------------------------------------------------------------   *---------------------------------------------------------------------------
813   *   *
814   * CompileRegexp --   * CompileRegexp --
815   *   *
816   *      Attempt to compile the given regexp pattern.  If the compiled   *      Attempt to compile the given regexp pattern.  If the compiled
817   *      regular expression can be found in the per-thread cache, it   *      regular expression can be found in the per-thread cache, it
818   *      will be used instead of compiling a new copy.   *      will be used instead of compiling a new copy.
819   *   *
820   * Results:   * Results:
821   *      The return value is a pointer to a newly allocated TclRegexp   *      The return value is a pointer to a newly allocated TclRegexp
822   *      that represents the compiled pattern, or NULL if the pattern   *      that represents the compiled pattern, or NULL if the pattern
823   *      could not be compiled.  If NULL is returned, an error message is   *      could not be compiled.  If NULL is returned, an error message is
824   *      left in the interp's result.   *      left in the interp's result.
825   *   *
826   * Side effects:   * Side effects:
827   *      The thread-local regexp cache is updated and a new TclRegexp may   *      The thread-local regexp cache is updated and a new TclRegexp may
828   *      be allocated.   *      be allocated.
829   *   *
830   *----------------------------------------------------------------------   *----------------------------------------------------------------------
831   */   */
832    
833  static TclRegexp *  static TclRegexp *
834  CompileRegexp(interp, string, length, flags)  CompileRegexp(interp, string, length, flags)
835      Tcl_Interp *interp;         /* Used for error reporting if not NULL. */      Tcl_Interp *interp;         /* Used for error reporting if not NULL. */
836      char *string;               /* The regexp to compile (UTF-8). */      char *string;               /* The regexp to compile (UTF-8). */
837      int length;                 /* The length of the string in bytes. */      int length;                 /* The length of the string in bytes. */
838      int flags;                  /* Compilation flags. */      int flags;                  /* Compilation flags. */
839  {  {
840      TclRegexp *regexpPtr;      TclRegexp *regexpPtr;
841      Tcl_UniChar *uniString;      Tcl_UniChar *uniString;
842      int numChars;      int numChars;
843      Tcl_DString stringBuf;      Tcl_DString stringBuf;
844      int status, i;      int status, i;
845      ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);      ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
846    
847      if (!tsdPtr->initialized) {      if (!tsdPtr->initialized) {
848          tsdPtr->initialized = 1;          tsdPtr->initialized = 1;
849          Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);          Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL);
850      }      }
851    
852      /*      /*
853       * This routine maintains a second-level regular expression cache in       * This routine maintains a second-level regular expression cache in
854       * addition to the per-object regexp cache.  The per-thread cache is needed       * addition to the per-object regexp cache.  The per-thread cache is needed
855       * to handle the case where for various reasons the object is lost between       * to handle the case where for various reasons the object is lost between
856       * invocations of the regexp command, but the literal pattern is the same.       * invocations of the regexp command, but the literal pattern is the same.
857       */       */
858    
859      /*      /*
860       * Check the per-thread compiled regexp cache.  We can only reuse       * Check the per-thread compiled regexp cache.  We can only reuse
861       * a regexp if it has the same pattern and the same flags.       * a regexp if it has the same pattern and the same flags.
862       */       */
863    
864      for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {      for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
865          if ((length == tsdPtr->patLengths[i])          if ((length == tsdPtr->patLengths[i])
866                  && (tsdPtr->regexps[i]->flags == flags)                  && (tsdPtr->regexps[i]->flags == flags)
867                  && (strcmp(string, tsdPtr->patterns[i]) == 0)) {                  && (strcmp(string, tsdPtr->patterns[i]) == 0)) {
868              /*              /*
869               * Move the matched pattern to the first slot in the               * Move the matched pattern to the first slot in the
870               * cache and shift the other patterns down one position.               * cache and shift the other patterns down one position.
871               */               */
872    
873              if (i != 0) {              if (i != 0) {
874                  int j;                  int j;
875                  char *cachedString;                  char *cachedString;
876    
877                  cachedString = tsdPtr->patterns[i];                  cachedString = tsdPtr->patterns[i];
878                  regexpPtr = tsdPtr->regexps[i];                  regexpPtr = tsdPtr->regexps[i];
879                  for (j = i-1; j >= 0; j--) {                  for (j = i-1; j >= 0; j--) {
880                      tsdPtr->patterns[j+1] = tsdPtr->patterns[j];                      tsdPtr->patterns[j+1] = tsdPtr->patterns[j];
881                      tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];                      tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j];
882                      tsdPtr->regexps[j+1] = tsdPtr->regexps[j];                      tsdPtr->regexps[j+1] = tsdPtr->regexps[j];
883                  }                  }
884                  tsdPtr->patterns[0] = cachedString;                  tsdPtr->patterns[0] = cachedString;
885                  tsdPtr->patLengths[0] = length;                  tsdPtr->patLengths[0] = length;
886                  tsdPtr->regexps[0] = regexpPtr;                  tsdPtr->regexps[0] = regexpPtr;
887              }              }
888              return tsdPtr->regexps[0];              return tsdPtr->regexps[0];
889          }          }
890      }      }
891    
892      /*      /*
893       * This is a new expression, so compile it and add it to the cache.       * This is a new expression, so compile it and add it to the cache.
894       */       */
895            
896      regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));      regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp));
897      regexpPtr->objPtr = NULL;      regexpPtr->objPtr = NULL;
898      regexpPtr->string = NULL;      regexpPtr->string = NULL;
899      regexpPtr->details.rm_extend.rm_so = -1;      regexpPtr->details.rm_extend.rm_so = -1;
900      regexpPtr->details.rm_extend.rm_eo = -1;      regexpPtr->details.rm_extend.rm_eo = -1;
901    
902      /*      /*
903       * Get the up-to-date string representation and map to unicode.       * Get the up-to-date string representation and map to unicode.
904       */       */
905    
906      Tcl_DStringInit(&stringBuf);      Tcl_DStringInit(&stringBuf);
907      uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);      uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf);
908      numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);      numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar);
909    
910      /*      /*
911       * Compile the string and check for errors.       * Compile the string and check for errors.
912       */       */
913    
914      regexpPtr->flags = flags;      regexpPtr->flags = flags;
915      status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);      status = TclReComp(&regexpPtr->re, uniString, (size_t) numChars, flags);
916      Tcl_DStringFree(&stringBuf);      Tcl_DStringFree(&stringBuf);
917    
918      if (status != REG_OKAY) {      if (status != REG_OKAY) {
919          /*          /*
920           * Clean up and report errors in the interpreter, if possible.           * Clean up and report errors in the interpreter, if possible.
921           */           */
922    
923          ckfree((char *)regexpPtr);          ckfree((char *)regexpPtr);
924          if (interp) {          if (interp) {
925              TclRegError(interp,              TclRegError(interp,
926                      "couldn't compile regular expression pattern: ",                      "couldn't compile regular expression pattern: ",
927                      status);                      status);
928          }          }
929          return NULL;          return NULL;
930      }      }
931    
932      /*      /*
933       * Allocate enough space for all of the subexpressions, plus one       * Allocate enough space for all of the subexpressions, plus one
934       * extra for the entire pattern.       * extra for the entire pattern.
935       */       */
936    
937      regexpPtr->matches = (regmatch_t *) ckalloc(      regexpPtr->matches = (regmatch_t *) ckalloc(
938              sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));              sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1));
939    
940      /*      /*
941       * Initialize the refcount to one initially, since it is in the cache.       * Initialize the refcount to one initially, since it is in the cache.
942       */       */
943    
944      regexpPtr->refCount = 1;      regexpPtr->refCount = 1;
945    
946      /*      /*
947       * Free the last regexp, if necessary, and make room at the head of the       * Free the last regexp, if necessary, and make room at the head of the
948       * list for the new regexp.       * list for the new regexp.
949       */       */
950    
951      if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {      if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) {
952          TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];          TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1];
953          if (--(oldRegexpPtr->refCount) <= 0) {          if (--(oldRegexpPtr->refCount) <= 0) {
954              FreeRegexp(oldRegexpPtr);              FreeRegexp(oldRegexpPtr);
955          }          }
956          ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);          ckfree(tsdPtr->patterns[NUM_REGEXPS-1]);
957      }      }
958      for (i = NUM_REGEXPS - 2; i >= 0; i--) {      for (i = NUM_REGEXPS - 2; i >= 0; i--) {
959          tsdPtr->patterns[i+1] = tsdPtr->patterns[i];          tsdPtr->patterns[i+1] = tsdPtr->patterns[i];
960          tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];          tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i];
961          tsdPtr->regexps[i+1] = tsdPtr->regexps[i];          tsdPtr->regexps[i+1] = tsdPtr->regexps[i];
962      }      }
963      tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));      tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1));
964      strcpy(tsdPtr->patterns[0], string);      strcpy(tsdPtr->patterns[0], string);
965      tsdPtr->patLengths[0] = length;      tsdPtr->patLengths[0] = length;
966      tsdPtr->regexps[0] = regexpPtr;      tsdPtr->regexps[0] = regexpPtr;
967    
968      return regexpPtr;      return regexpPtr;
969  }  }
970    
971  /*  /*
972   *----------------------------------------------------------------------   *----------------------------------------------------------------------
973   *   *
974   * FreeRegexp --   * FreeRegexp --
975   *   *
976   *      Release the storage associated with a TclRegexp.   *      Release the storage associated with a TclRegexp.
977   *   *
978   * Results:   * Results:
979   *      None.   *      None.
980   *   *
981   * Side effects:   * Side effects:
982   *      None.   *      None.
983   *   *
984   *----------------------------------------------------------------------   *----------------------------------------------------------------------
985   */   */
986    
987  static void  static void
988  FreeRegexp(regexpPtr)  FreeRegexp(regexpPtr)
989      TclRegexp *regexpPtr;       /* Compiled regular expression to free. */      TclRegexp *regexpPtr;       /* Compiled regular expression to free. */
990  {  {
991      TclReFree(&regexpPtr->re);      TclReFree(&regexpPtr->re);
992      if (regexpPtr->matches) {      if (regexpPtr->matches) {
993          ckfree((char *) regexpPtr->matches);          ckfree((char *) regexpPtr->matches);
994      }      }
995      ckfree((char *) regexpPtr);      ckfree((char *) regexpPtr);
996  }  }
997    
998  /*  /*
999   *----------------------------------------------------------------------   *----------------------------------------------------------------------
1000   *   *
1001   * FinalizeRegexp --   * FinalizeRegexp --
1002   *   *
1003   *      Release the storage associated with the per-thread regexp   *      Release the storage associated with the per-thread regexp
1004   *      cache.   *      cache.
1005   *   *
1006   * Results:   * Results:
1007   *      None.   *      None.
1008   *   *
1009   * Side effects:   * Side effects:
1010   *      None.   *      None.
1011   *   *
1012   *----------------------------------------------------------------------   *----------------------------------------------------------------------
1013   */   */
1014    
1015  static void  static void
1016  FinalizeRegexp(clientData)  FinalizeRegexp(clientData)
1017      ClientData clientData;      /* Not used. */      ClientData clientData;      /* Not used. */
1018  {  {
1019      int i;      int i;
1020      TclRegexp *regexpPtr;      TclRegexp *regexpPtr;
1021      ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);      ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
1022    
1023      for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {      for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) {
1024          regexpPtr = tsdPtr->regexps[i];          regexpPtr = tsdPtr->regexps[i];
1025          if (--(regexpPtr->refCount) <= 0) {          if (--(regexpPtr->refCount) <= 0) {
1026              FreeRegexp(regexpPtr);              FreeRegexp(regexpPtr);
1027          }          }
1028          ckfree(tsdPtr->patterns[i]);          ckfree(tsdPtr->patterns[i]);
1029      }      }
1030  }  }
1031    
1032  /* End of tclregexp.c */  /* End of tclregexp.c */

Legend:
Removed from v.67  
changed lines
  Added in v.71

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25