1 |
/* $Header$ */ |
2 |
/* |
3 |
* tclEncoding.c -- |
4 |
* |
5 |
* Contains the implementation of the encoding conversion package. |
6 |
* |
7 |
* Copyright (c) 1996-1998 Sun Microsystems, Inc. |
8 |
* |
9 |
* See the file "license.terms" for information on usage and redistribution |
10 |
* of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
11 |
* |
12 |
* RCS: @(#) $Id: tclencoding.c,v 1.1.1.1 2001/06/13 04:37:49 dtashley Exp $ |
13 |
*/ |
14 |
|
15 |
#include "tclInt.h" |
16 |
#include "tclPort.h" |
17 |
|
18 |
typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src)); |
19 |
|
20 |
/* |
21 |
* The following data structure represents an encoding, which describes how |
22 |
* to convert between various character sets and UTF-8. |
23 |
*/ |
24 |
|
25 |
typedef struct Encoding { |
26 |
char *name; /* Name of encoding. Malloced because (1) |
27 |
* hash table entry that owns this encoding |
28 |
* may be freed prior to this encoding being |
29 |
* freed, (2) string passed in the |
30 |
* Tcl_EncodingType structure may not be |
31 |
* persistent. */ |
32 |
Tcl_EncodingConvertProc *toUtfProc; |
33 |
/* Procedure to convert from external |
34 |
* encoding into UTF-8. */ |
35 |
Tcl_EncodingConvertProc *fromUtfProc; |
36 |
/* Procedure to convert from UTF-8 into |
37 |
* external encoding. */ |
38 |
Tcl_EncodingFreeProc *freeProc; |
39 |
/* If non-NULL, procedure to call when this |
40 |
* encoding is deleted. */ |
41 |
int nullSize; /* Number of 0x00 bytes that signify |
42 |
* end-of-string in this encoding. This |
43 |
* number is used to determine the source |
44 |
* string length when the srcLen argument is |
45 |
* negative. This number can be 1 or 2. */ |
46 |
ClientData clientData; /* Arbitrary value associated with encoding |
47 |
* type. Passed to conversion procedures. */ |
48 |
LengthProc *lengthProc; /* Function to compute length of |
49 |
* null-terminated strings in this encoding. |
50 |
* If nullSize is 1, this is strlen; if |
51 |
* nullSize is 2, this is a function that |
52 |
* returns the number of bytes in a 0x0000 |
53 |
* terminated string. */ |
54 |
int refCount; /* Number of uses of this structure. */ |
55 |
Tcl_HashEntry *hPtr; /* Hash table entry that owns this encoding. */ |
56 |
} Encoding; |
57 |
|
58 |
/* |
59 |
* The following structure is the clientData for a dynamically-loaded, |
60 |
* table-driven encoding created by LoadTableEncoding(). It maps between |
61 |
* Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only) |
62 |
* encoding. |
63 |
*/ |
64 |
|
65 |
typedef struct TableEncodingData { |
66 |
int fallback; /* Character (in this encoding) to |
67 |
* substitute when this encoding cannot |
68 |
* represent a UTF-8 character. */ |
69 |
char prefixBytes[256]; /* If a byte in the input stream is a lead |
70 |
* byte for a 2-byte sequence, the |
71 |
* corresponding entry in this array is 1, |
72 |
* otherwise it is 0. */ |
73 |
unsigned short **toUnicode; /* Two dimensional sparse matrix to map |
74 |
* characters from the encoding to Unicode. |
75 |
* Each element of the toUnicode array points |
76 |
* to an array of 256 shorts. If there is no |
77 |
* corresponding character in Unicode, the |
78 |
* value in the matrix is 0x0000. malloc'd. */ |
79 |
unsigned short **fromUnicode; |
80 |
/* Two dimensional sparse matrix to map |
81 |
* characters from Unicode to the encoding. |
82 |
* Each element of the fromUnicode array |
83 |
* points to an array of 256 shorts. If there |
84 |
* is no corresponding character the encoding, |
85 |
* the value in the matrix is 0x0000. |
86 |
* malloc'd. */ |
87 |
} TableEncodingData; |
88 |
|
89 |
/* |
90 |
* The following structures is the clientData for a dynamically-loaded, |
91 |
* escape-driven encoding that is itself comprised of other simpler |
92 |
* encodings. An example is "iso-2022-jp", which uses escape sequences to |
93 |
* switch between ascii, jis0208, jis0212, gb2312, and ksc5601. Note that |
94 |
* "escape-driven" does not necessarily mean that the ESCAPE character is |
95 |
* the character used for switching character sets. |
96 |
*/ |
97 |
|
98 |
typedef struct EscapeSubTable { |
99 |
unsigned int sequenceLen; /* Length of following string. */ |
100 |
char sequence[16]; /* Escape code that marks this encoding. */ |
101 |
char name[32]; /* Name for encoding. */ |
102 |
Encoding *encodingPtr; /* Encoding loaded using above name, or NULL |
103 |
* if this sub-encoding has not been needed |
104 |
* yet. */ |
105 |
} EscapeSubTable; |
106 |
|
107 |
typedef struct EscapeEncodingData { |
108 |
int fallback; /* Character (in this encoding) to |
109 |
* substitute when this encoding cannot |
110 |
* represent a UTF-8 character. */ |
111 |
unsigned int initLen; /* Length of following string. */ |
112 |
char init[16]; /* String to emit or expect before first char |
113 |
* in conversion. */ |
114 |
unsigned int finalLen; /* Length of following string. */ |
115 |
char final[16]; /* String to emit or expect after last char |
116 |
* in conversion. */ |
117 |
char prefixBytes[256]; /* If a byte in the input stream is the |
118 |
* first character of one of the escape |
119 |
* sequences in the following array, the |
120 |
* corresponding entry in this array is 1, |
121 |
* otherwise it is 0. */ |
122 |
int numSubTables; /* Length of following array. */ |
123 |
EscapeSubTable subTables[1];/* Information about each EscapeSubTable |
124 |
* used by this encoding type. The actual |
125 |
* size will be as large as necessary to |
126 |
* hold all EscapeSubTables. */ |
127 |
} EscapeEncodingData; |
128 |
|
129 |
/* |
130 |
* Constants used when loading an encoding file to identify the type of the |
131 |
* file. |
132 |
*/ |
133 |
|
134 |
#define ENCODING_SINGLEBYTE 0 |
135 |
#define ENCODING_DOUBLEBYTE 1 |
136 |
#define ENCODING_MULTIBYTE 2 |
137 |
#define ENCODING_ESCAPE 3 |
138 |
|
139 |
/* |
140 |
* Initialize the default encoding directory. If this variable contains |
141 |
* a non NULL value, it will be the first path used to locate the |
142 |
* system encoding files. |
143 |
*/ |
144 |
|
145 |
char *tclDefaultEncodingDir = NULL; |
146 |
|
147 |
static int encodingsInitialized = 0; |
148 |
|
149 |
/* |
150 |
* Hash table that keeps track of all loaded Encodings. Keys are |
151 |
* the string names that represent the encoding, values are (Encoding *). |
152 |
*/ |
153 |
|
154 |
static Tcl_HashTable encodingTable; |
155 |
TCL_DECLARE_MUTEX(encodingMutex) |
156 |
|
157 |
/* |
158 |
* The following are used to hold the default and current system encodings. |
159 |
* If NULL is passed to one of the conversion routines, the current setting |
160 |
* of the system encoding will be used to perform the conversion. |
161 |
*/ |
162 |
|
163 |
static Tcl_Encoding defaultEncoding; |
164 |
static Tcl_Encoding systemEncoding; |
165 |
|
166 |
/* |
167 |
* The following variable is used in the sparse matrix code for a |
168 |
* TableEncoding to represent a page in the table that has no entries. |
169 |
*/ |
170 |
|
171 |
static unsigned short emptyPage[256]; |
172 |
|
173 |
/* |
174 |
* Procedures used only in this module. |
175 |
*/ |
176 |
|
177 |
static int BinaryProc _ANSI_ARGS_((ClientData clientData, |
178 |
CONST char *src, int srcLen, int flags, |
179 |
Tcl_EncodingState *statePtr, char *dst, int dstLen, |
180 |
int *srcReadPtr, int *dstWrotePtr, |
181 |
int *dstCharsPtr)); |
182 |
static void EscapeFreeProc _ANSI_ARGS_((ClientData clientData)); |
183 |
static int EscapeFromUtfProc _ANSI_ARGS_((ClientData clientData, |
184 |
CONST char *src, int srcLen, int flags, |
185 |
Tcl_EncodingState *statePtr, char *dst, int dstLen, |
186 |
int *srcReadPtr, int *dstWrotePtr, |
187 |
int *dstCharsPtr)); |
188 |
static int EscapeToUtfProc _ANSI_ARGS_((ClientData clientData, |
189 |
CONST char *src, int srcLen, int flags, |
190 |
Tcl_EncodingState *statePtr, char *dst, int dstLen, |
191 |
int *srcReadPtr, int *dstWrotePtr, |
192 |
int *dstCharsPtr)); |
193 |
static void FreeEncoding _ANSI_ARGS_((Tcl_Encoding encoding)); |
194 |
static Encoding * GetTableEncoding _ANSI_ARGS_(( |
195 |
EscapeEncodingData *dataPtr, int state)); |
196 |
static Tcl_Encoding LoadEncodingFile _ANSI_ARGS_((Tcl_Interp *interp, |
197 |
CONST char *name)); |
198 |
static Tcl_Encoding LoadTableEncoding _ANSI_ARGS_((Tcl_Interp *interp, |
199 |
CONST char *name, int type, Tcl_Channel chan)); |
200 |
static Tcl_Encoding LoadEscapeEncoding _ANSI_ARGS_((CONST char *name, |
201 |
Tcl_Channel chan)); |
202 |
static Tcl_Channel OpenEncodingFile _ANSI_ARGS_((CONST char *dir, |
203 |
CONST char *name)); |
204 |
static void TableFreeProc _ANSI_ARGS_((ClientData clientData)); |
205 |
static int TableFromUtfProc _ANSI_ARGS_((ClientData clientData, |
206 |
CONST char *src, int srcLen, int flags, |
207 |
Tcl_EncodingState *statePtr, char *dst, int dstLen, |
208 |
int *srcReadPtr, int *dstWrotePtr, |
209 |
int *dstCharsPtr)); |
210 |
static int TableToUtfProc _ANSI_ARGS_((ClientData clientData, |
211 |
CONST char *src, int srcLen, int flags, |
212 |
Tcl_EncodingState *statePtr, char *dst, int dstLen, |
213 |
int *srcReadPtr, int *dstWrotePtr, |
214 |
int *dstCharsPtr)); |
215 |
static size_t unilen _ANSI_ARGS_((CONST char *src)); |
216 |
static int UnicodeToUtfProc _ANSI_ARGS_((ClientData clientData, |
217 |
CONST char *src, int srcLen, int flags, |
218 |
Tcl_EncodingState *statePtr, char *dst, int dstLen, |
219 |
int *srcReadPtr, int *dstWrotePtr, |
220 |
int *dstCharsPtr)); |
221 |
static int UtfToUnicodeProc _ANSI_ARGS_((ClientData clientData, |
222 |
CONST char *src, int srcLen, int flags, |
223 |
Tcl_EncodingState *statePtr, char *dst, int dstLen, |
224 |
int *srcReadPtr, int *dstWrotePtr, |
225 |
int *dstCharsPtr)); |
226 |
static int UtfToUtfProc _ANSI_ARGS_((ClientData clientData, |
227 |
CONST char *src, int srcLen, int flags, |
228 |
Tcl_EncodingState *statePtr, char *dst, int dstLen, |
229 |
int *srcReadPtr, int *dstWrotePtr, |
230 |
int *dstCharsPtr)); |
231 |
|
232 |
|
233 |
/* |
234 |
*--------------------------------------------------------------------------- |
235 |
* |
236 |
* TclInitEncodingSubsystem -- |
237 |
* |
238 |
* Initialize all resources used by this subsystem on a per-process |
239 |
* basis. |
240 |
* |
241 |
* Results: |
242 |
* None. |
243 |
* |
244 |
* Side effects: |
245 |
* Depends on the memory, object, and IO subsystems. |
246 |
* |
247 |
*--------------------------------------------------------------------------- |
248 |
*/ |
249 |
|
250 |
void |
251 |
TclInitEncodingSubsystem() |
252 |
{ |
253 |
Tcl_EncodingType type; |
254 |
|
255 |
Tcl_MutexLock(&encodingMutex); |
256 |
Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); |
257 |
Tcl_MutexUnlock(&encodingMutex); |
258 |
|
259 |
/* |
260 |
* Create a few initial encodings. Note that the UTF-8 to UTF-8 |
261 |
* translation is not a no-op, because it will turn a stream of |
262 |
* improperly formed UTF-8 into a properly formed stream. |
263 |
*/ |
264 |
|
265 |
type.encodingName = "identity"; |
266 |
type.toUtfProc = BinaryProc; |
267 |
type.fromUtfProc = BinaryProc; |
268 |
type.freeProc = NULL; |
269 |
type.nullSize = 1; |
270 |
type.clientData = NULL; |
271 |
|
272 |
defaultEncoding = Tcl_CreateEncoding(&type); |
273 |
systemEncoding = Tcl_GetEncoding(NULL, type.encodingName); |
274 |
|
275 |
type.encodingName = "utf-8"; |
276 |
type.toUtfProc = UtfToUtfProc; |
277 |
type.fromUtfProc = UtfToUtfProc; |
278 |
type.freeProc = NULL; |
279 |
type.nullSize = 1; |
280 |
type.clientData = NULL; |
281 |
Tcl_CreateEncoding(&type); |
282 |
|
283 |
type.encodingName = "unicode"; |
284 |
type.toUtfProc = UnicodeToUtfProc; |
285 |
type.fromUtfProc = UtfToUnicodeProc; |
286 |
type.freeProc = NULL; |
287 |
type.nullSize = 2; |
288 |
type.clientData = NULL; |
289 |
Tcl_CreateEncoding(&type); |
290 |
} |
291 |
|
292 |
|
293 |
/* |
294 |
*---------------------------------------------------------------------- |
295 |
* |
296 |
* TclFinalizeEncodingSubsystem -- |
297 |
* |
298 |
* Release the state associated with the encoding subsystem. |
299 |
* |
300 |
* Results: |
301 |
* None. |
302 |
* |
303 |
* Side effects: |
304 |
* Frees all of the encodings. |
305 |
* |
306 |
*---------------------------------------------------------------------- |
307 |
*/ |
308 |
|
309 |
void |
310 |
TclFinalizeEncodingSubsystem() |
311 |
{ |
312 |
Tcl_HashSearch search; |
313 |
Tcl_HashEntry *hPtr; |
314 |
Encoding *encodingPtr; |
315 |
|
316 |
Tcl_MutexLock(&encodingMutex); |
317 |
encodingsInitialized = 0; |
318 |
hPtr = Tcl_FirstHashEntry(&encodingTable, &search); |
319 |
while (hPtr != NULL) { |
320 |
encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); |
321 |
if (encodingPtr->freeProc != NULL) { |
322 |
(*encodingPtr->freeProc)(encodingPtr->clientData); |
323 |
} |
324 |
ckfree((char *) encodingPtr->name); |
325 |
ckfree((char *) encodingPtr); |
326 |
hPtr = Tcl_NextHashEntry(&search); |
327 |
} |
328 |
Tcl_DeleteHashTable(&encodingTable); |
329 |
Tcl_MutexUnlock(&encodingMutex); |
330 |
} |
331 |
|
332 |
/* |
333 |
*------------------------------------------------------------------------- |
334 |
* |
335 |
* Tcl_GetDefaultEncodingDir -- |
336 |
* |
337 |
* |
338 |
* Results: |
339 |
* |
340 |
* Side effects: |
341 |
* |
342 |
*------------------------------------------------------------------------- |
343 |
*/ |
344 |
|
345 |
char * |
346 |
Tcl_GetDefaultEncodingDir() |
347 |
{ |
348 |
return tclDefaultEncodingDir; |
349 |
} |
350 |
|
351 |
/* |
352 |
*------------------------------------------------------------------------- |
353 |
* |
354 |
* Tcl_SetDefaultEncodingDir -- |
355 |
* |
356 |
* |
357 |
* Results: |
358 |
* |
359 |
* Side effects: |
360 |
* |
361 |
*------------------------------------------------------------------------- |
362 |
*/ |
363 |
|
364 |
void |
365 |
Tcl_SetDefaultEncodingDir(path) |
366 |
char *path; |
367 |
{ |
368 |
tclDefaultEncodingDir = (char *)ckalloc((unsigned) strlen(path) + 1); |
369 |
strcpy(tclDefaultEncodingDir, path); |
370 |
} |
371 |
|
372 |
/* |
373 |
*------------------------------------------------------------------------- |
374 |
* |
375 |
* Tcl_GetEncoding -- |
376 |
* |
377 |
* Given the name of a encoding, find the corresponding Tcl_Encoding |
378 |
* token. If the encoding did not already exist, Tcl attempts to |
379 |
* dynamically load an encoding by that name. |
380 |
* |
381 |
* Results: |
382 |
* Returns a token that represents the encoding. If the name didn't |
383 |
* refer to any known or loadable encoding, NULL is returned. If |
384 |
* NULL was returned, an error message is left in interp's result |
385 |
* object, unless interp was NULL. |
386 |
* |
387 |
* Side effects: |
388 |
* The new encoding type is entered into a table visible to all |
389 |
* interpreters, keyed off the encoding's name. For each call to |
390 |
* this procedure, there should eventually be a call to |
391 |
* Tcl_FreeEncoding, so that the database can be cleaned up when |
392 |
* encodings aren't needed anymore. |
393 |
* |
394 |
*------------------------------------------------------------------------- |
395 |
*/ |
396 |
|
397 |
Tcl_Encoding |
398 |
Tcl_GetEncoding(interp, name) |
399 |
Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ |
400 |
CONST char *name; /* The name of the desired encoding. */ |
401 |
{ |
402 |
Tcl_HashEntry *hPtr; |
403 |
Encoding *encodingPtr; |
404 |
|
405 |
Tcl_MutexLock(&encodingMutex); |
406 |
if (name == NULL) { |
407 |
encodingPtr = (Encoding *) systemEncoding; |
408 |
encodingPtr->refCount++; |
409 |
Tcl_MutexUnlock(&encodingMutex); |
410 |
return systemEncoding; |
411 |
} |
412 |
|
413 |
hPtr = Tcl_FindHashEntry(&encodingTable, name); |
414 |
if (hPtr != NULL) { |
415 |
encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); |
416 |
encodingPtr->refCount++; |
417 |
Tcl_MutexUnlock(&encodingMutex); |
418 |
return (Tcl_Encoding) encodingPtr; |
419 |
} |
420 |
Tcl_MutexUnlock(&encodingMutex); |
421 |
return LoadEncodingFile(interp, name); |
422 |
} |
423 |
|
424 |
/* |
425 |
*--------------------------------------------------------------------------- |
426 |
* |
427 |
* Tcl_FreeEncoding -- |
428 |
* |
429 |
* This procedure is called to release an encoding allocated by |
430 |
* Tcl_CreateEncoding() or Tcl_GetEncoding(). |
431 |
* |
432 |
* Results: |
433 |
* None. |
434 |
* |
435 |
* Side effects: |
436 |
* The reference count associated with the encoding is decremented |
437 |
* and the encoding may be deleted if nothing is using it anymore. |
438 |
* |
439 |
*--------------------------------------------------------------------------- |
440 |
*/ |
441 |
|
442 |
void |
443 |
Tcl_FreeEncoding(encoding) |
444 |
Tcl_Encoding encoding; |
445 |
{ |
446 |
Tcl_MutexLock(&encodingMutex); |
447 |
FreeEncoding(encoding); |
448 |
Tcl_MutexUnlock(&encodingMutex); |
449 |
} |
450 |
|
451 |
/* |
452 |
*---------------------------------------------------------------------- |
453 |
* |
454 |
* FreeEncoding -- |
455 |
* |
456 |
* This procedure is called to release an encoding by procedures |
457 |
* that already have the encodingMutex. |
458 |
* |
459 |
* Results: |
460 |
* None. |
461 |
* |
462 |
* Side effects: |
463 |
* The reference count associated with the encoding is decremented |
464 |
* and the encoding may be deleted if nothing is using it anymore. |
465 |
* |
466 |
*---------------------------------------------------------------------- |
467 |
*/ |
468 |
|
469 |
static void |
470 |
FreeEncoding(encoding) |
471 |
Tcl_Encoding encoding; |
472 |
{ |
473 |
Encoding *encodingPtr; |
474 |
|
475 |
encodingPtr = (Encoding *) encoding; |
476 |
if (encodingPtr == NULL) { |
477 |
return; |
478 |
} |
479 |
encodingPtr->refCount--; |
480 |
if (encodingPtr->refCount == 0) { |
481 |
if (encodingPtr->freeProc != NULL) { |
482 |
(*encodingPtr->freeProc)(encodingPtr->clientData); |
483 |
} |
484 |
if (encodingPtr->hPtr != NULL) { |
485 |
Tcl_DeleteHashEntry(encodingPtr->hPtr); |
486 |
} |
487 |
ckfree((char *) encodingPtr->name); |
488 |
ckfree((char *) encodingPtr); |
489 |
} |
490 |
} |
491 |
|
492 |
/* |
493 |
*------------------------------------------------------------------------- |
494 |
* |
495 |
* Tcl_GetEncodingName -- |
496 |
* |
497 |
* Given an encoding, return the name that was used to constuct |
498 |
* the encoding. |
499 |
* |
500 |
* Results: |
501 |
* The name of the encoding. |
502 |
* |
503 |
* Side effects: |
504 |
* None. |
505 |
* |
506 |
*--------------------------------------------------------------------------- |
507 |
*/ |
508 |
|
509 |
char * |
510 |
Tcl_GetEncodingName(encoding) |
511 |
Tcl_Encoding encoding; /* The encoding whose name to fetch. */ |
512 |
{ |
513 |
Encoding *encodingPtr; |
514 |
|
515 |
if (encoding == NULL) { |
516 |
encoding = systemEncoding; |
517 |
} |
518 |
encodingPtr = (Encoding *) encoding; |
519 |
return encodingPtr->name; |
520 |
} |
521 |
|
522 |
/* |
523 |
*------------------------------------------------------------------------- |
524 |
* |
525 |
* Tcl_GetEncodingNames -- |
526 |
* |
527 |
* Get the list of all known encodings, including the ones stored |
528 |
* as files on disk in the encoding path. |
529 |
* |
530 |
* Results: |
531 |
* Modifies interp's result object to hold a list of all the available |
532 |
* encodings. |
533 |
* |
534 |
* Side effects: |
535 |
* None. |
536 |
* |
537 |
*------------------------------------------------------------------------- |
538 |
*/ |
539 |
|
540 |
void |
541 |
Tcl_GetEncodingNames(interp) |
542 |
Tcl_Interp *interp; /* Interp to hold result. */ |
543 |
{ |
544 |
Tcl_HashSearch search; |
545 |
Tcl_HashEntry *hPtr; |
546 |
Tcl_Obj *pathPtr, *resultPtr; |
547 |
int dummy; |
548 |
|
549 |
Tcl_HashTable table; |
550 |
|
551 |
Tcl_MutexLock(&encodingMutex); |
552 |
Tcl_InitHashTable(&table, TCL_STRING_KEYS); |
553 |
hPtr = Tcl_FirstHashEntry(&encodingTable, &search); |
554 |
while (hPtr != NULL) { |
555 |
Encoding *encodingPtr; |
556 |
|
557 |
encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); |
558 |
Tcl_CreateHashEntry(&table, encodingPtr->name, &dummy); |
559 |
hPtr = Tcl_NextHashEntry(&search); |
560 |
} |
561 |
Tcl_MutexUnlock(&encodingMutex); |
562 |
|
563 |
pathPtr = TclGetLibraryPath(); |
564 |
if (pathPtr != NULL) { |
565 |
int i, objc; |
566 |
Tcl_Obj **objv; |
567 |
Tcl_DString pwdString; |
568 |
char globArgString[10]; |
569 |
|
570 |
objc = 0; |
571 |
Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv); |
572 |
|
573 |
Tcl_GetCwd(interp, &pwdString); |
574 |
|
575 |
for (i = 0; i < objc; i++) { |
576 |
char *string; |
577 |
int j, objc2, length; |
578 |
Tcl_Obj **objv2; |
579 |
|
580 |
string = Tcl_GetStringFromObj(objv[i], NULL); |
581 |
Tcl_ResetResult(interp); |
582 |
|
583 |
/* |
584 |
* TclGlob() changes the contents of globArgString, which causes |
585 |
* a segfault if we pass in a pointer to non-writeable memory. |
586 |
* TclGlob() puts its results directly into interp. |
587 |
*/ |
588 |
|
589 |
strcpy(globArgString, "*.enc"); |
590 |
if ((Tcl_Chdir(string) == 0) |
591 |
&& (Tcl_Chdir("encoding") == 0) |
592 |
&& (TclGlob(interp, globArgString, NULL, 0, NULL) == TCL_OK)) { |
593 |
objc2 = 0; |
594 |
|
595 |
Tcl_ListObjGetElements(NULL, Tcl_GetObjResult(interp), &objc2, |
596 |
&objv2); |
597 |
|
598 |
for (j = 0; j < objc2; j++) { |
599 |
string = Tcl_GetStringFromObj(objv2[j], &length); |
600 |
length -= 4; |
601 |
if (length > 0) { |
602 |
string[length] = '\0'; |
603 |
Tcl_CreateHashEntry(&table, string, &dummy); |
604 |
string[length] = '.'; |
605 |
} |
606 |
} |
607 |
} |
608 |
Tcl_Chdir(Tcl_DStringValue(&pwdString)); |
609 |
} |
610 |
Tcl_DStringFree(&pwdString); |
611 |
} |
612 |
|
613 |
/* |
614 |
* Clear any values placed in the result by globbing. |
615 |
*/ |
616 |
|
617 |
Tcl_ResetResult(interp); |
618 |
resultPtr = Tcl_GetObjResult(interp); |
619 |
|
620 |
hPtr = Tcl_FirstHashEntry(&table, &search); |
621 |
while (hPtr != NULL) { |
622 |
Tcl_Obj *strPtr; |
623 |
|
624 |
strPtr = Tcl_NewStringObj(Tcl_GetHashKey(&table, hPtr), -1); |
625 |
Tcl_ListObjAppendElement(NULL, resultPtr, strPtr); |
626 |
hPtr = Tcl_NextHashEntry(&search); |
627 |
} |
628 |
Tcl_DeleteHashTable(&table); |
629 |
} |
630 |
|
631 |
/* |
632 |
*------------------------------------------------------------------------ |
633 |
* |
634 |
* Tcl_SetSystemEncoding -- |
635 |
* |
636 |
* Sets the default encoding that should be used whenever the user |
637 |
* passes a NULL value in to one of the conversion routines. |
638 |
* If the supplied name is NULL, the system encoding is reset to the |
639 |
* default system encoding. |
640 |
* |
641 |
* Results: |
642 |
* The return value is TCL_OK if the system encoding was successfully |
643 |
* set to the encoding specified by name, TCL_ERROR otherwise. If |
644 |
* TCL_ERROR is returned, an error message is left in interp's result |
645 |
* object, unless interp was NULL. |
646 |
* |
647 |
* Side effects: |
648 |
* The reference count of the new system encoding is incremented. |
649 |
* The reference count of the old system encoding is decremented and |
650 |
* it may be freed. |
651 |
* |
652 |
*------------------------------------------------------------------------ |
653 |
*/ |
654 |
|
655 |
int |
656 |
Tcl_SetSystemEncoding(interp, name) |
657 |
Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ |
658 |
CONST char *name; /* The name of the desired encoding, or NULL |
659 |
* to reset to default encoding. */ |
660 |
{ |
661 |
Tcl_Encoding encoding; |
662 |
Encoding *encodingPtr; |
663 |
|
664 |
if (name == NULL) { |
665 |
Tcl_MutexLock(&encodingMutex); |
666 |
encoding = defaultEncoding; |
667 |
encodingPtr = (Encoding *) encoding; |
668 |
encodingPtr->refCount++; |
669 |
Tcl_MutexUnlock(&encodingMutex); |
670 |
} else { |
671 |
encoding = Tcl_GetEncoding(interp, name); |
672 |
if (encoding == NULL) { |
673 |
return TCL_ERROR; |
674 |
} |
675 |
} |
676 |
|
677 |
Tcl_MutexLock(&encodingMutex); |
678 |
FreeEncoding(systemEncoding); |
679 |
systemEncoding = encoding; |
680 |
Tcl_MutexUnlock(&encodingMutex); |
681 |
|
682 |
return TCL_OK; |
683 |
} |
684 |
|
685 |
/* |
686 |
*--------------------------------------------------------------------------- |
687 |
* |
688 |
* Tcl_CreateEncoding -- |
689 |
* |
690 |
* This procedure is called to define a new encoding and the procedures |
691 |
* that are used to convert between the specified encoding and Unicode. |
692 |
* |
693 |
* Results: |
694 |
* Returns a token that represents the encoding. If an encoding with |
695 |
* the same name already existed, the old encoding token remains |
696 |
* valid and continues to behave as it used to, and will eventually |
697 |
* be garbage collected when the last reference to it goes away. Any |
698 |
* subsequent calls to Tcl_GetEncoding with the specified name will |
699 |
* retrieve the most recent encoding token. |
700 |
* |
701 |
* Side effects: |
702 |
* The new encoding type is entered into a table visible to all |
703 |
* interpreters, keyed off the encoding's name. For each call to |
704 |
* this procedure, there should eventually be a call to |
705 |
* Tcl_FreeEncoding, so that the database can be cleaned up when |
706 |
* encodings aren't needed anymore. |
707 |
* |
708 |
*--------------------------------------------------------------------------- |
709 |
*/ |
710 |
|
711 |
Tcl_Encoding |
712 |
Tcl_CreateEncoding(typePtr) |
713 |
Tcl_EncodingType *typePtr; /* The encoding type. */ |
714 |
{ |
715 |
Tcl_HashEntry *hPtr; |
716 |
int new; |
717 |
Encoding *encodingPtr; |
718 |
char *name; |
719 |
|
720 |
Tcl_MutexLock(&encodingMutex); |
721 |
hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &new); |
722 |
if (new == 0) { |
723 |
/* |
724 |
* Remove old encoding from hash table, but don't delete it until |
725 |
* last reference goes away. |
726 |
*/ |
727 |
|
728 |
encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); |
729 |
encodingPtr->hPtr = NULL; |
730 |
} |
731 |
|
732 |
name = ckalloc((unsigned) strlen(typePtr->encodingName) + 1); |
733 |
|
734 |
encodingPtr = (Encoding *) ckalloc(sizeof(Encoding)); |
735 |
encodingPtr->name = strcpy(name, typePtr->encodingName); |
736 |
encodingPtr->toUtfProc = typePtr->toUtfProc; |
737 |
encodingPtr->fromUtfProc = typePtr->fromUtfProc; |
738 |
encodingPtr->freeProc = typePtr->freeProc; |
739 |
encodingPtr->nullSize = typePtr->nullSize; |
740 |
encodingPtr->clientData = typePtr->clientData; |
741 |
if (typePtr->nullSize == 1) { |
742 |
encodingPtr->lengthProc = (LengthProc *) strlen; |
743 |
} else { |
744 |
encodingPtr->lengthProc = (LengthProc *) unilen; |
745 |
} |
746 |
encodingPtr->refCount = 1; |
747 |
encodingPtr->hPtr = hPtr; |
748 |
Tcl_SetHashValue(hPtr, encodingPtr); |
749 |
|
750 |
Tcl_MutexUnlock(&encodingMutex); |
751 |
|
752 |
return (Tcl_Encoding) encodingPtr; |
753 |
} |
754 |
|
755 |
/* |
756 |
*------------------------------------------------------------------------- |
757 |
* |
758 |
* Tcl_ExternalToUtfDString -- |
759 |
* |
760 |
* Convert a source buffer from the specified encoding into UTF-8. |
761 |
* If any of the bytes in the source buffer are invalid or cannot |
762 |
* be represented in the target encoding, a default fallback |
763 |
* character will be substituted. |
764 |
* |
765 |
* Results: |
766 |
* The converted bytes are stored in the DString, which is then NULL |
767 |
* terminated. The return value is a pointer to the value stored |
768 |
* in the DString. |
769 |
* |
770 |
* Side effects: |
771 |
* None. |
772 |
* |
773 |
*------------------------------------------------------------------------- |
774 |
*/ |
775 |
|
776 |
char * |
777 |
Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr) |
778 |
Tcl_Encoding encoding; /* The encoding for the source string, or |
779 |
* NULL for the default system encoding. */ |
780 |
CONST char *src; /* Source string in specified encoding. */ |
781 |
int srcLen; /* Source string length in bytes, or < 0 for |
782 |
* encoding-specific string length. */ |
783 |
Tcl_DString *dstPtr; /* Uninitialized or free DString in which |
784 |
* the converted string is stored. */ |
785 |
{ |
786 |
char *dst; |
787 |
Tcl_EncodingState state; |
788 |
Encoding *encodingPtr; |
789 |
int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; |
790 |
|
791 |
Tcl_DStringInit(dstPtr); |
792 |
dst = Tcl_DStringValue(dstPtr); |
793 |
dstLen = dstPtr->spaceAvl - 1; |
794 |
|
795 |
if (encoding == NULL) { |
796 |
encoding = systemEncoding; |
797 |
} |
798 |
encodingPtr = (Encoding *) encoding; |
799 |
|
800 |
if (src == NULL) { |
801 |
srcLen = 0; |
802 |
} else if (srcLen < 0) { |
803 |
srcLen = (*encodingPtr->lengthProc)(src); |
804 |
} |
805 |
flags = TCL_ENCODING_START | TCL_ENCODING_END; |
806 |
while (1) { |
807 |
result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, |
808 |
srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, |
809 |
&dstChars); |
810 |
soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); |
811 |
if (result != TCL_CONVERT_NOSPACE) { |
812 |
Tcl_DStringSetLength(dstPtr, soFar); |
813 |
return Tcl_DStringValue(dstPtr); |
814 |
} |
815 |
flags &= ~TCL_ENCODING_START; |
816 |
src += srcRead; |
817 |
srcLen -= srcRead; |
818 |
if (Tcl_DStringLength(dstPtr) == 0) { |
819 |
Tcl_DStringSetLength(dstPtr, dstLen); |
820 |
} |
821 |
Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); |
822 |
dst = Tcl_DStringValue(dstPtr) + soFar; |
823 |
dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; |
824 |
} |
825 |
} |
826 |
|
827 |
/* |
828 |
*------------------------------------------------------------------------- |
829 |
* |
830 |
* Tcl_ExternalToUtf -- |
831 |
* |
832 |
* Convert a source buffer from the specified encoding into UTF-8, |
833 |
* |
834 |
* Results: |
835 |
* The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE, |
836 |
* TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, |
837 |
* as documented in tcl.h. |
838 |
* |
839 |
* Side effects: |
840 |
* The converted bytes are stored in the output buffer. |
841 |
* |
842 |
*------------------------------------------------------------------------- |
843 |
*/ |
844 |
|
845 |
int |
846 |
Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, |
847 |
dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr) |
848 |
Tcl_Interp *interp; /* Interp for error return, if not NULL. */ |
849 |
Tcl_Encoding encoding; /* The encoding for the source string, or |
850 |
* NULL for the default system encoding. */ |
851 |
CONST char *src; /* Source string in specified encoding. */ |
852 |
int srcLen; /* Source string length in bytes, or < 0 for |
853 |
* encoding-specific string length. */ |
854 |
int flags; /* Conversion control flags. */ |
855 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
856 |
* state information used during a piecewise |
857 |
* conversion. Contents of statePtr are |
858 |
* initialized and/or reset by conversion |
859 |
* routine under control of flags argument. */ |
860 |
char *dst; /* Output buffer in which converted string |
861 |
* is stored. */ |
862 |
int dstLen; /* The maximum length of output buffer in |
863 |
* bytes. */ |
864 |
int *srcReadPtr; /* Filled with the number of bytes from the |
865 |
* source string that were converted. This |
866 |
* may be less than the original source length |
867 |
* if there was a problem converting some |
868 |
* source characters. */ |
869 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
870 |
* stored in the output buffer as a result of |
871 |
* the conversion. */ |
872 |
int *dstCharsPtr; /* Filled with the number of characters that |
873 |
* correspond to the bytes stored in the |
874 |
* output buffer. */ |
875 |
{ |
876 |
Encoding *encodingPtr; |
877 |
int result, srcRead, dstWrote, dstChars; |
878 |
Tcl_EncodingState state; |
879 |
|
880 |
if (encoding == NULL) { |
881 |
encoding = systemEncoding; |
882 |
} |
883 |
encodingPtr = (Encoding *) encoding; |
884 |
|
885 |
if (src == NULL) { |
886 |
srcLen = 0; |
887 |
} else if (srcLen < 0) { |
888 |
srcLen = (*encodingPtr->lengthProc)(src); |
889 |
} |
890 |
if (statePtr == NULL) { |
891 |
flags |= TCL_ENCODING_START | TCL_ENCODING_END; |
892 |
statePtr = &state; |
893 |
} |
894 |
if (srcReadPtr == NULL) { |
895 |
srcReadPtr = &srcRead; |
896 |
} |
897 |
if (dstWrotePtr == NULL) { |
898 |
dstWrotePtr = &dstWrote; |
899 |
} |
900 |
if (dstCharsPtr == NULL) { |
901 |
dstCharsPtr = &dstChars; |
902 |
} |
903 |
|
904 |
/* |
905 |
* If there are any null characters in the middle of the buffer, they will |
906 |
* converted to the UTF-8 null character (\xC080). To get the actual |
907 |
* \0 at the end of the destination buffer, we need to append it manually. |
908 |
*/ |
909 |
|
910 |
dstLen--; |
911 |
result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, srcLen, |
912 |
flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, |
913 |
dstCharsPtr); |
914 |
dst[*dstWrotePtr] = '\0'; |
915 |
return result; |
916 |
} |
917 |
|
918 |
/* |
919 |
*------------------------------------------------------------------------- |
920 |
* |
921 |
* Tcl_UtfToExternalDString -- |
922 |
* |
923 |
* Convert a source buffer from UTF-8 into the specified encoding. |
924 |
* If any of the bytes in the source buffer are invalid or cannot |
925 |
* be represented in the target encoding, a default fallback |
926 |
* character will be substituted. |
927 |
* |
928 |
* Results: |
929 |
* The converted bytes are stored in the DString, which is then |
930 |
* NULL terminated in an encoding-specific manner. The return value |
931 |
* is a pointer to the value stored in the DString. |
932 |
* |
933 |
* Side effects: |
934 |
* None. |
935 |
* |
936 |
*------------------------------------------------------------------------- |
937 |
*/ |
938 |
|
939 |
char * |
940 |
Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr) |
941 |
Tcl_Encoding encoding; /* The encoding for the converted string, |
942 |
* or NULL for the default system encoding. */ |
943 |
CONST char *src; /* Source string in UTF-8. */ |
944 |
int srcLen; /* Source string length in bytes, or < 0 for |
945 |
* strlen(). */ |
946 |
Tcl_DString *dstPtr; /* Uninitialized or free DString in which |
947 |
* the converted string is stored. */ |
948 |
{ |
949 |
char *dst; |
950 |
Tcl_EncodingState state; |
951 |
Encoding *encodingPtr; |
952 |
int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; |
953 |
|
954 |
Tcl_DStringInit(dstPtr); |
955 |
dst = Tcl_DStringValue(dstPtr); |
956 |
dstLen = dstPtr->spaceAvl - 1; |
957 |
|
958 |
if (encoding == NULL) { |
959 |
encoding = systemEncoding; |
960 |
} |
961 |
encodingPtr = (Encoding *) encoding; |
962 |
|
963 |
if (src == NULL) { |
964 |
srcLen = 0; |
965 |
} else if (srcLen < 0) { |
966 |
srcLen = strlen(src); |
967 |
} |
968 |
flags = TCL_ENCODING_START | TCL_ENCODING_END; |
969 |
while (1) { |
970 |
result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, |
971 |
srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, |
972 |
&dstChars); |
973 |
soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); |
974 |
if (result != TCL_CONVERT_NOSPACE) { |
975 |
if (encodingPtr->nullSize == 2) { |
976 |
Tcl_DStringSetLength(dstPtr, soFar + 1); |
977 |
} |
978 |
Tcl_DStringSetLength(dstPtr, soFar); |
979 |
return Tcl_DStringValue(dstPtr); |
980 |
} |
981 |
flags &= ~TCL_ENCODING_START; |
982 |
src += srcRead; |
983 |
srcLen -= srcRead; |
984 |
if (Tcl_DStringLength(dstPtr) == 0) { |
985 |
Tcl_DStringSetLength(dstPtr, dstLen); |
986 |
} |
987 |
Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); |
988 |
dst = Tcl_DStringValue(dstPtr) + soFar; |
989 |
dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; |
990 |
} |
991 |
} |
992 |
|
993 |
/* |
994 |
*------------------------------------------------------------------------- |
995 |
* |
996 |
* Tcl_UtfToExternal -- |
997 |
* |
998 |
* Convert a buffer from UTF-8 into the specified encoding. |
999 |
* |
1000 |
* Results: |
1001 |
* The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE, |
1002 |
* TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, |
1003 |
* as documented in tcl.h. |
1004 |
* |
1005 |
* Side effects: |
1006 |
* The converted bytes are stored in the output buffer. |
1007 |
* |
1008 |
*------------------------------------------------------------------------- |
1009 |
*/ |
1010 |
|
1011 |
int |
1012 |
Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, |
1013 |
dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr) |
1014 |
Tcl_Interp *interp; /* Interp for error return, if not NULL. */ |
1015 |
Tcl_Encoding encoding; /* The encoding for the converted string, |
1016 |
* or NULL for the default system encoding. */ |
1017 |
CONST char *src; /* Source string in UTF-8. */ |
1018 |
int srcLen; /* Source string length in bytes, or < 0 for |
1019 |
* strlen(). */ |
1020 |
int flags; /* Conversion control flags. */ |
1021 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
1022 |
* state information used during a piecewise |
1023 |
* conversion. Contents of statePtr are |
1024 |
* initialized and/or reset by conversion |
1025 |
* routine under control of flags argument. */ |
1026 |
char *dst; /* Output buffer in which converted string |
1027 |
* is stored. */ |
1028 |
int dstLen; /* The maximum length of output buffer in |
1029 |
* bytes. */ |
1030 |
int *srcReadPtr; /* Filled with the number of bytes from the |
1031 |
* source string that were converted. This |
1032 |
* may be less than the original source length |
1033 |
* if there was a problem converting some |
1034 |
* source characters. */ |
1035 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
1036 |
* stored in the output buffer as a result of |
1037 |
* the conversion. */ |
1038 |
int *dstCharsPtr; /* Filled with the number of characters that |
1039 |
* correspond to the bytes stored in the |
1040 |
* output buffer. */ |
1041 |
{ |
1042 |
Encoding *encodingPtr; |
1043 |
int result, srcRead, dstWrote, dstChars; |
1044 |
Tcl_EncodingState state; |
1045 |
|
1046 |
if (encoding == NULL) { |
1047 |
encoding = systemEncoding; |
1048 |
} |
1049 |
encodingPtr = (Encoding *) encoding; |
1050 |
|
1051 |
if (src == NULL) { |
1052 |
srcLen = 0; |
1053 |
} else if (srcLen < 0) { |
1054 |
srcLen = strlen(src); |
1055 |
} |
1056 |
if (statePtr == NULL) { |
1057 |
flags |= TCL_ENCODING_START | TCL_ENCODING_END; |
1058 |
statePtr = &state; |
1059 |
} |
1060 |
if (srcReadPtr == NULL) { |
1061 |
srcReadPtr = &srcRead; |
1062 |
} |
1063 |
if (dstWrotePtr == NULL) { |
1064 |
dstWrotePtr = &dstWrote; |
1065 |
} |
1066 |
if (dstCharsPtr == NULL) { |
1067 |
dstCharsPtr = &dstChars; |
1068 |
} |
1069 |
|
1070 |
dstLen -= encodingPtr->nullSize; |
1071 |
result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, srcLen, |
1072 |
flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, |
1073 |
dstCharsPtr); |
1074 |
if (encodingPtr->nullSize == 2) { |
1075 |
dst[*dstWrotePtr + 1] = '\0'; |
1076 |
} |
1077 |
dst[*dstWrotePtr] = '\0'; |
1078 |
|
1079 |
return result; |
1080 |
} |
1081 |
|
1082 |
/* |
1083 |
*--------------------------------------------------------------------------- |
1084 |
* |
1085 |
* Tcl_FindExecutable -- |
1086 |
* |
1087 |
* This procedure computes the absolute path name of the current |
1088 |
* application, given its argv[0] value. |
1089 |
* |
1090 |
* Results: |
1091 |
* None. |
1092 |
* |
1093 |
* Side effects: |
1094 |
* The variable tclExecutableName gets filled in with the file |
1095 |
* name for the application, if we figured it out. If we couldn't |
1096 |
* figure it out, tclExecutableName is set to NULL. |
1097 |
* |
1098 |
*--------------------------------------------------------------------------- |
1099 |
*/ |
1100 |
|
1101 |
void |
1102 |
Tcl_FindExecutable(argv0) |
1103 |
CONST char *argv0; /* The value of the application's argv[0] |
1104 |
* (native). */ |
1105 |
{ |
1106 |
CONST char *name; |
1107 |
Tcl_DString buffer, nameString; |
1108 |
|
1109 |
TclInitSubsystems(argv0); |
1110 |
|
1111 |
if (argv0 == NULL) { |
1112 |
goto done; |
1113 |
} |
1114 |
if (tclExecutableName != NULL) { |
1115 |
ckfree(tclExecutableName); |
1116 |
tclExecutableName = NULL; |
1117 |
} |
1118 |
if ((name = TclpFindExecutable(argv0)) == NULL) { |
1119 |
goto done; |
1120 |
} |
1121 |
|
1122 |
/* |
1123 |
* The value returned from TclpNameOfExecutable is a UTF string that |
1124 |
* is possibly dirty depending on when it was initialized. To assure |
1125 |
* that the UTF string is a properly encoded native string for this |
1126 |
* system, convert the UTF string to the default native encoding |
1127 |
* before the default encoding is initialized. Then, convert it back |
1128 |
* to UTF after the system encoding is loaded. |
1129 |
*/ |
1130 |
|
1131 |
Tcl_UtfToExternalDString(NULL, name, -1, &buffer); |
1132 |
TclFindEncodings(argv0); |
1133 |
|
1134 |
/* |
1135 |
* Now it is OK to convert the native string back to UTF and set |
1136 |
* the value of the tclExecutableName. |
1137 |
*/ |
1138 |
|
1139 |
Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&buffer), -1, &nameString); |
1140 |
tclExecutableName = (char *) |
1141 |
ckalloc((unsigned) (Tcl_DStringLength(&nameString) + 1)); |
1142 |
strcpy(tclExecutableName, Tcl_DStringValue(&nameString)); |
1143 |
|
1144 |
Tcl_DStringFree(&buffer); |
1145 |
Tcl_DStringFree(&nameString); |
1146 |
return; |
1147 |
|
1148 |
done: |
1149 |
TclFindEncodings(argv0); |
1150 |
} |
1151 |
|
1152 |
/* |
1153 |
*--------------------------------------------------------------------------- |
1154 |
* |
1155 |
* LoadEncodingFile -- |
1156 |
* |
1157 |
* Read a file that describes an encoding and create a new Encoding |
1158 |
* from the data. |
1159 |
* |
1160 |
* Results: |
1161 |
* The return value is the newly loaded Encoding, or NULL if |
1162 |
* the file didn't exist of was in the incorrect format. If NULL was |
1163 |
* returned, an error message is left in interp's result object, |
1164 |
* unless interp was NULL. |
1165 |
* |
1166 |
* Side effects: |
1167 |
* File read from disk. |
1168 |
* |
1169 |
*--------------------------------------------------------------------------- |
1170 |
*/ |
1171 |
|
1172 |
static Tcl_Encoding |
1173 |
LoadEncodingFile(interp, name) |
1174 |
Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ |
1175 |
CONST char *name; /* The name of the encoding file on disk |
1176 |
* and also the name for new encoding. */ |
1177 |
{ |
1178 |
int objc, i, ch; |
1179 |
Tcl_Obj **objv; |
1180 |
Tcl_Obj *pathPtr; |
1181 |
Tcl_Channel chan; |
1182 |
Tcl_Encoding encoding; |
1183 |
|
1184 |
pathPtr = TclGetLibraryPath(); |
1185 |
if (pathPtr == NULL) { |
1186 |
goto unknown; |
1187 |
} |
1188 |
objc = 0; |
1189 |
Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv); |
1190 |
|
1191 |
chan = NULL; |
1192 |
for (i = 0; i < objc; i++) { |
1193 |
chan = OpenEncodingFile(Tcl_GetString(objv[i]), name); |
1194 |
if (chan != NULL) { |
1195 |
break; |
1196 |
} |
1197 |
} |
1198 |
|
1199 |
if (chan == NULL) { |
1200 |
goto unknown; |
1201 |
} |
1202 |
|
1203 |
Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8"); |
1204 |
|
1205 |
while (1) { |
1206 |
Tcl_DString ds; |
1207 |
|
1208 |
Tcl_DStringInit(&ds); |
1209 |
Tcl_Gets(chan, &ds); |
1210 |
ch = Tcl_DStringValue(&ds)[0]; |
1211 |
Tcl_DStringFree(&ds); |
1212 |
if (ch != '#') { |
1213 |
break; |
1214 |
} |
1215 |
} |
1216 |
|
1217 |
encoding = NULL; |
1218 |
switch (ch) { |
1219 |
case 'S': { |
1220 |
encoding = LoadTableEncoding(interp, name, ENCODING_SINGLEBYTE, |
1221 |
chan); |
1222 |
break; |
1223 |
} |
1224 |
case 'D': { |
1225 |
encoding = LoadTableEncoding(interp, name, ENCODING_DOUBLEBYTE, |
1226 |
chan); |
1227 |
break; |
1228 |
} |
1229 |
case 'M': { |
1230 |
encoding = LoadTableEncoding(interp, name, ENCODING_MULTIBYTE, |
1231 |
chan); |
1232 |
break; |
1233 |
} |
1234 |
case 'E': { |
1235 |
encoding = LoadEscapeEncoding(name, chan); |
1236 |
break; |
1237 |
} |
1238 |
} |
1239 |
if ((encoding == NULL) && (interp != NULL)) { |
1240 |
Tcl_AppendResult(interp, "invalid encoding file \"", name, "\"", NULL); |
1241 |
} |
1242 |
Tcl_Close(NULL, chan); |
1243 |
return encoding; |
1244 |
|
1245 |
unknown: |
1246 |
if (interp != NULL) { |
1247 |
Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL); |
1248 |
} |
1249 |
return NULL; |
1250 |
} |
1251 |
|
1252 |
/* |
1253 |
*---------------------------------------------------------------------- |
1254 |
* |
1255 |
* OpenEncodingFile -- |
1256 |
* |
1257 |
* Look for the file encoding/<name>.enc in the specified |
1258 |
* directory. |
1259 |
* |
1260 |
* Results: |
1261 |
* Returns an open file channel if the file exists. |
1262 |
* |
1263 |
* Side effects: |
1264 |
* None. |
1265 |
* |
1266 |
*---------------------------------------------------------------------- |
1267 |
*/ |
1268 |
|
1269 |
static Tcl_Channel |
1270 |
OpenEncodingFile(dir, name) |
1271 |
CONST char *dir; |
1272 |
CONST char *name; |
1273 |
|
1274 |
{ |
1275 |
char *argv[3]; |
1276 |
Tcl_DString pathString; |
1277 |
char *path; |
1278 |
Tcl_Channel chan; |
1279 |
|
1280 |
argv[0] = (char *) dir; |
1281 |
argv[1] = "encoding"; |
1282 |
argv[2] = (char *) name; |
1283 |
|
1284 |
Tcl_DStringInit(&pathString); |
1285 |
Tcl_JoinPath(3, argv, &pathString); |
1286 |
path = Tcl_DStringAppend(&pathString, ".enc", -1); |
1287 |
chan = Tcl_OpenFileChannel(NULL, path, "r", 0); |
1288 |
Tcl_DStringFree(&pathString); |
1289 |
|
1290 |
return chan; |
1291 |
} |
1292 |
|
1293 |
/* |
1294 |
*------------------------------------------------------------------------- |
1295 |
* |
1296 |
* LoadTableEncoding -- |
1297 |
* |
1298 |
* Helper function for LoadEncodingTable(). Loads a table to that |
1299 |
* converts between Unicode and some other encoding and creates an |
1300 |
* encoding (using a TableEncoding structure) from that information. |
1301 |
* |
1302 |
* File contains binary data, but begins with a marker to indicate |
1303 |
* byte-ordering, so that same binary file can be read on either |
1304 |
* endian platforms. |
1305 |
* |
1306 |
* Results: |
1307 |
* The return value is the new encoding, or NULL if the encoding |
1308 |
* could not be created (because the file contained invalid data). |
1309 |
* |
1310 |
* Side effects: |
1311 |
* None. |
1312 |
* |
1313 |
*------------------------------------------------------------------------- |
1314 |
*/ |
1315 |
|
1316 |
static Tcl_Encoding |
1317 |
LoadTableEncoding(interp, name, type, chan) |
1318 |
Tcl_Interp *interp; /* Interp for temporary obj while reading. */ |
1319 |
CONST char *name; /* Name for new encoding. */ |
1320 |
int type; /* Type of encoding (ENCODING_?????). */ |
1321 |
Tcl_Channel chan; /* File containing new encoding. */ |
1322 |
{ |
1323 |
Tcl_DString lineString; |
1324 |
Tcl_Obj *objPtr; |
1325 |
char *line; |
1326 |
int i, hi, lo, numPages, symbol, fallback; |
1327 |
unsigned char used[256]; |
1328 |
unsigned int size; |
1329 |
TableEncodingData *dataPtr; |
1330 |
unsigned short *pageMemPtr; |
1331 |
Tcl_EncodingType encType; |
1332 |
char *hex; |
1333 |
static char staticHex[] = { |
1334 |
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, |
1335 |
10, 11, 12, 13, 14, 15 |
1336 |
}; |
1337 |
|
1338 |
hex = staticHex - '0'; |
1339 |
|
1340 |
Tcl_DStringInit(&lineString); |
1341 |
Tcl_Gets(chan, &lineString); |
1342 |
line = Tcl_DStringValue(&lineString); |
1343 |
|
1344 |
fallback = (int) strtol(line, &line, 16); |
1345 |
symbol = (int) strtol(line, &line, 10); |
1346 |
numPages = (int) strtol(line, &line, 10); |
1347 |
Tcl_DStringFree(&lineString); |
1348 |
|
1349 |
if (numPages < 0) { |
1350 |
numPages = 0; |
1351 |
} else if (numPages > 256) { |
1352 |
numPages = 256; |
1353 |
} |
1354 |
|
1355 |
memset(used, 0, sizeof(used)); |
1356 |
|
1357 |
#undef PAGESIZE |
1358 |
#define PAGESIZE (256 * sizeof(unsigned short)) |
1359 |
|
1360 |
dataPtr = (TableEncodingData *) ckalloc(sizeof(TableEncodingData)); |
1361 |
memset(dataPtr, 0, sizeof(TableEncodingData)); |
1362 |
|
1363 |
dataPtr->fallback = fallback; |
1364 |
|
1365 |
/* |
1366 |
* Read the table that maps characters to Unicode. Performs a single |
1367 |
* malloc to get the memory for the array and all the pages needed by |
1368 |
* the array. |
1369 |
*/ |
1370 |
|
1371 |
size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; |
1372 |
dataPtr->toUnicode = (unsigned short **) ckalloc(size); |
1373 |
memset(dataPtr->toUnicode, 0, size); |
1374 |
pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256); |
1375 |
|
1376 |
if (interp == NULL) { |
1377 |
objPtr = Tcl_NewObj(); |
1378 |
} else { |
1379 |
objPtr = Tcl_GetObjResult(interp); |
1380 |
} |
1381 |
for (i = 0; i < numPages; i++) { |
1382 |
int ch; |
1383 |
char *p; |
1384 |
|
1385 |
Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0); |
1386 |
p = Tcl_GetString(objPtr); |
1387 |
hi = (hex[(int)p[0]] << 4) + hex[(int)p[1]]; |
1388 |
dataPtr->toUnicode[hi] = pageMemPtr; |
1389 |
p += 2; |
1390 |
for (lo = 0; lo < 256; lo++) { |
1391 |
if ((lo & 0x0f) == 0) { |
1392 |
p++; |
1393 |
} |
1394 |
ch = (hex[(int)p[0]] << 12) + (hex[(int)p[1]] << 8) |
1395 |
+ (hex[(int)p[2]] << 4) + hex[(int)p[3]]; |
1396 |
if (ch != 0) { |
1397 |
used[ch >> 8] = 1; |
1398 |
} |
1399 |
*pageMemPtr = (unsigned short) ch; |
1400 |
pageMemPtr++; |
1401 |
p += 4; |
1402 |
} |
1403 |
} |
1404 |
if (interp == NULL) { |
1405 |
Tcl_DecrRefCount(objPtr); |
1406 |
} else { |
1407 |
Tcl_ResetResult(interp); |
1408 |
} |
1409 |
|
1410 |
if (type == ENCODING_DOUBLEBYTE) { |
1411 |
memset(dataPtr->prefixBytes, 1, sizeof(dataPtr->prefixBytes)); |
1412 |
} else { |
1413 |
for (hi = 1; hi < 256; hi++) { |
1414 |
if (dataPtr->toUnicode[hi] != NULL) { |
1415 |
dataPtr->prefixBytes[hi] = 1; |
1416 |
} |
1417 |
} |
1418 |
} |
1419 |
|
1420 |
/* |
1421 |
* Invert toUnicode array to produce the fromUnicode array. Performs a |
1422 |
* single malloc to get the memory for the array and all the pages |
1423 |
* needed by the array. While reading in the toUnicode array, we |
1424 |
* remembered what pages that would be needed for the fromUnicode array. |
1425 |
*/ |
1426 |
|
1427 |
if (symbol) { |
1428 |
used[0] = 1; |
1429 |
} |
1430 |
numPages = 0; |
1431 |
for (hi = 0; hi < 256; hi++) { |
1432 |
if (used[hi]) { |
1433 |
numPages++; |
1434 |
} |
1435 |
} |
1436 |
size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; |
1437 |
dataPtr->fromUnicode = (unsigned short **) ckalloc(size); |
1438 |
memset(dataPtr->fromUnicode, 0, size); |
1439 |
pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256); |
1440 |
|
1441 |
for (hi = 0; hi < 256; hi++) { |
1442 |
if (dataPtr->toUnicode[hi] == NULL) { |
1443 |
dataPtr->toUnicode[hi] = emptyPage; |
1444 |
} else { |
1445 |
for (lo = 0; lo < 256; lo++) { |
1446 |
int ch; |
1447 |
|
1448 |
ch = dataPtr->toUnicode[hi][lo]; |
1449 |
if (ch != 0) { |
1450 |
unsigned short *page; |
1451 |
|
1452 |
page = dataPtr->fromUnicode[ch >> 8]; |
1453 |
if (page == NULL) { |
1454 |
page = pageMemPtr; |
1455 |
pageMemPtr += 256; |
1456 |
dataPtr->fromUnicode[ch >> 8] = page; |
1457 |
} |
1458 |
page[ch & 0xff] = (unsigned short) ((hi << 8) + lo); |
1459 |
} |
1460 |
} |
1461 |
} |
1462 |
} |
1463 |
if (type == ENCODING_MULTIBYTE) { |
1464 |
/* |
1465 |
* If multibyte encodings don't have a backslash character, define |
1466 |
* one. Otherwise, on Windows, native file names won't work because |
1467 |
* the backslash in the file name will map to the unknown character |
1468 |
* (question mark) when converting from UTF-8 to external encoding. |
1469 |
*/ |
1470 |
|
1471 |
if (dataPtr->fromUnicode[0] != NULL) { |
1472 |
if (dataPtr->fromUnicode[0]['\\'] == '\0') { |
1473 |
dataPtr->fromUnicode[0]['\\'] = '\\'; |
1474 |
} |
1475 |
} |
1476 |
} |
1477 |
if (symbol) { |
1478 |
unsigned short *page; |
1479 |
|
1480 |
/* |
1481 |
* Make a special symbol encoding that not only maps the symbol |
1482 |
* characters from their Unicode code points down into page 0, but |
1483 |
* also ensure that the characters on page 0 map to themselves. |
1484 |
* This is so that a symbol font can be used to display a simple |
1485 |
* string like "abcd" and have alpha, beta, chi, delta show up, |
1486 |
* rather than have "unknown" chars show up because strictly |
1487 |
* speaking the symbol font doesn't have glyphs for those low ascii |
1488 |
* chars. |
1489 |
*/ |
1490 |
|
1491 |
page = dataPtr->fromUnicode[0]; |
1492 |
if (page == NULL) { |
1493 |
page = pageMemPtr; |
1494 |
dataPtr->fromUnicode[0] = page; |
1495 |
} |
1496 |
for (lo = 0; lo < 256; lo++) { |
1497 |
if (dataPtr->toUnicode[0][lo] != 0) { |
1498 |
page[lo] = (unsigned short) lo; |
1499 |
} |
1500 |
} |
1501 |
} |
1502 |
for (hi = 0; hi < 256; hi++) { |
1503 |
if (dataPtr->fromUnicode[hi] == NULL) { |
1504 |
dataPtr->fromUnicode[hi] = emptyPage; |
1505 |
} |
1506 |
} |
1507 |
encType.encodingName = name; |
1508 |
encType.toUtfProc = TableToUtfProc; |
1509 |
encType.fromUtfProc = TableFromUtfProc; |
1510 |
encType.freeProc = TableFreeProc; |
1511 |
encType.nullSize = (type == ENCODING_DOUBLEBYTE) ? 2 : 1; |
1512 |
encType.clientData = (ClientData) dataPtr; |
1513 |
return Tcl_CreateEncoding(&encType); |
1514 |
|
1515 |
} |
1516 |
|
1517 |
/* |
1518 |
*------------------------------------------------------------------------- |
1519 |
* |
1520 |
* LoadEscapeEncoding -- |
1521 |
* |
1522 |
* Helper function for LoadEncodingTable(). Loads a state machine |
1523 |
* that converts between Unicode and some other encoding. |
1524 |
* |
1525 |
* File contains text data that describes the escape sequences that |
1526 |
* are used to choose an encoding and the associated names for the |
1527 |
* sub-encodings. |
1528 |
* |
1529 |
* Results: |
1530 |
* The return value is the new encoding, or NULL if the encoding |
1531 |
* could not be created (because the file contained invalid data). |
1532 |
* |
1533 |
* Side effects: |
1534 |
* None. |
1535 |
* |
1536 |
*------------------------------------------------------------------------- |
1537 |
*/ |
1538 |
|
1539 |
static Tcl_Encoding |
1540 |
LoadEscapeEncoding(name, chan) |
1541 |
CONST char *name; /* Name for new encoding. */ |
1542 |
Tcl_Channel chan; /* File containing new encoding. */ |
1543 |
{ |
1544 |
int i; |
1545 |
unsigned int size; |
1546 |
Tcl_DString escapeData; |
1547 |
char init[16], final[16]; |
1548 |
EscapeEncodingData *dataPtr; |
1549 |
Tcl_EncodingType type; |
1550 |
|
1551 |
init[0] = '\0'; |
1552 |
final[0] = '\0'; |
1553 |
Tcl_DStringInit(&escapeData); |
1554 |
|
1555 |
while (1) { |
1556 |
int argc; |
1557 |
char **argv; |
1558 |
char *line; |
1559 |
Tcl_DString lineString; |
1560 |
|
1561 |
Tcl_DStringInit(&lineString); |
1562 |
if (Tcl_Gets(chan, &lineString) < 0) { |
1563 |
break; |
1564 |
} |
1565 |
line = Tcl_DStringValue(&lineString); |
1566 |
if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) { |
1567 |
continue; |
1568 |
} |
1569 |
if (argc >= 2) { |
1570 |
if (strcmp(argv[0], "name") == 0) { |
1571 |
; |
1572 |
} else if (strcmp(argv[0], "init") == 0) { |
1573 |
strncpy(init, argv[1], sizeof(init)); |
1574 |
init[sizeof(init) - 1] = '\0'; |
1575 |
} else if (strcmp(argv[0], "final") == 0) { |
1576 |
strncpy(final, argv[1], sizeof(final)); |
1577 |
final[sizeof(final) - 1] = '\0'; |
1578 |
} else { |
1579 |
EscapeSubTable est; |
1580 |
|
1581 |
strncpy(est.sequence, argv[1], sizeof(est.sequence)); |
1582 |
est.sequence[sizeof(est.sequence) - 1] = '\0'; |
1583 |
est.sequenceLen = strlen(est.sequence); |
1584 |
|
1585 |
strncpy(est.name, argv[0], sizeof(est.name)); |
1586 |
est.name[sizeof(est.name) - 1] = '\0'; |
1587 |
|
1588 |
est.encodingPtr = NULL; |
1589 |
Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est)); |
1590 |
} |
1591 |
} |
1592 |
ckfree((char *) argv); |
1593 |
Tcl_DStringFree(&lineString); |
1594 |
} |
1595 |
|
1596 |
size = sizeof(EscapeEncodingData) |
1597 |
- sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData); |
1598 |
dataPtr = (EscapeEncodingData *) ckalloc(size); |
1599 |
dataPtr->initLen = strlen(init); |
1600 |
strcpy(dataPtr->init, init); |
1601 |
dataPtr->finalLen = strlen(final); |
1602 |
strcpy(dataPtr->final, final); |
1603 |
dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable); |
1604 |
memcpy((VOID *) dataPtr->subTables, (VOID *) Tcl_DStringValue(&escapeData), |
1605 |
(size_t) Tcl_DStringLength(&escapeData)); |
1606 |
Tcl_DStringFree(&escapeData); |
1607 |
|
1608 |
memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes)); |
1609 |
for (i = 0; i < dataPtr->numSubTables; i++) { |
1610 |
dataPtr->prefixBytes[UCHAR(dataPtr->subTables[i].sequence[0])] = 1; |
1611 |
} |
1612 |
if (dataPtr->init[0] != '\0') { |
1613 |
dataPtr->prefixBytes[UCHAR(dataPtr->init[0])] = 1; |
1614 |
} |
1615 |
if (dataPtr->final[0] != '\0') { |
1616 |
dataPtr->prefixBytes[UCHAR(dataPtr->final[0])] = 1; |
1617 |
} |
1618 |
|
1619 |
type.encodingName = name; |
1620 |
type.toUtfProc = EscapeToUtfProc; |
1621 |
type.fromUtfProc = EscapeFromUtfProc; |
1622 |
type.freeProc = EscapeFreeProc; |
1623 |
type.nullSize = 1; |
1624 |
type.clientData = (ClientData) dataPtr; |
1625 |
|
1626 |
return Tcl_CreateEncoding(&type); |
1627 |
} |
1628 |
|
1629 |
/* |
1630 |
*------------------------------------------------------------------------- |
1631 |
* |
1632 |
* BinaryProc -- |
1633 |
* |
1634 |
* The default conversion when no other conversion is specified. |
1635 |
* No translation is done; source bytes are copied directly to |
1636 |
* destination bytes. |
1637 |
* |
1638 |
* Results: |
1639 |
* Returns TCL_OK if conversion was successful. |
1640 |
* |
1641 |
* Side effects: |
1642 |
* None. |
1643 |
* |
1644 |
*------------------------------------------------------------------------- |
1645 |
*/ |
1646 |
|
1647 |
static int |
1648 |
BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, |
1649 |
srcReadPtr, dstWrotePtr, dstCharsPtr) |
1650 |
ClientData clientData; /* Not used. */ |
1651 |
CONST char *src; /* Source string (unknown encoding). */ |
1652 |
int srcLen; /* Source string length in bytes. */ |
1653 |
int flags; /* Conversion control flags. */ |
1654 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
1655 |
* state information used during a piecewise |
1656 |
* conversion. Contents of statePtr are |
1657 |
* initialized and/or reset by conversion |
1658 |
* routine under control of flags argument. */ |
1659 |
char *dst; /* Output buffer in which converted string |
1660 |
* is stored. */ |
1661 |
int dstLen; /* The maximum length of output buffer in |
1662 |
* bytes. */ |
1663 |
int *srcReadPtr; /* Filled with the number of bytes from the |
1664 |
* source string that were converted. */ |
1665 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
1666 |
* stored in the output buffer as a result of |
1667 |
* the conversion. */ |
1668 |
int *dstCharsPtr; /* Filled with the number of characters that |
1669 |
* correspond to the bytes stored in the |
1670 |
* output buffer. */ |
1671 |
{ |
1672 |
int result; |
1673 |
|
1674 |
result = TCL_OK; |
1675 |
dstLen -= TCL_UTF_MAX - 1; |
1676 |
if (dstLen < 0) { |
1677 |
dstLen = 0; |
1678 |
} |
1679 |
if (srcLen > dstLen) { |
1680 |
srcLen = dstLen; |
1681 |
result = TCL_CONVERT_NOSPACE; |
1682 |
} |
1683 |
|
1684 |
*srcReadPtr = srcLen; |
1685 |
*dstWrotePtr = srcLen; |
1686 |
*dstCharsPtr = srcLen; |
1687 |
for ( ; --srcLen >= 0; ) { |
1688 |
*dst++ = *src++; |
1689 |
} |
1690 |
return result; |
1691 |
} |
1692 |
|
1693 |
/* |
1694 |
*------------------------------------------------------------------------- |
1695 |
* |
1696 |
* UtfToUtfProc -- |
1697 |
* |
1698 |
* Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 |
1699 |
* translation is not a no-op, because it will turn a stream of |
1700 |
* improperly formed UTF-8 into a properly formed stream. |
1701 |
* |
1702 |
* Results: |
1703 |
* Returns TCL_OK if conversion was successful. |
1704 |
* |
1705 |
* Side effects: |
1706 |
* None. |
1707 |
* |
1708 |
*------------------------------------------------------------------------- |
1709 |
*/ |
1710 |
|
1711 |
static int |
1712 |
UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, |
1713 |
srcReadPtr, dstWrotePtr, dstCharsPtr) |
1714 |
ClientData clientData; /* Not used. */ |
1715 |
CONST char *src; /* Source string in UTF-8. */ |
1716 |
int srcLen; /* Source string length in bytes. */ |
1717 |
int flags; /* Conversion control flags. */ |
1718 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
1719 |
* state information used during a piecewise |
1720 |
* conversion. Contents of statePtr are |
1721 |
* initialized and/or reset by conversion |
1722 |
* routine under control of flags argument. */ |
1723 |
char *dst; /* Output buffer in which converted string |
1724 |
* is stored. */ |
1725 |
int dstLen; /* The maximum length of output buffer in |
1726 |
* bytes. */ |
1727 |
int *srcReadPtr; /* Filled with the number of bytes from the |
1728 |
* source string that were converted. This |
1729 |
* may be less than the original source length |
1730 |
* if there was a problem converting some |
1731 |
* source characters. */ |
1732 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
1733 |
* stored in the output buffer as a result of |
1734 |
* the conversion. */ |
1735 |
int *dstCharsPtr; /* Filled with the number of characters that |
1736 |
* correspond to the bytes stored in the |
1737 |
* output buffer. */ |
1738 |
{ |
1739 |
CONST char *srcStart, *srcEnd, *srcClose; |
1740 |
char *dstStart, *dstEnd; |
1741 |
int result, numChars; |
1742 |
Tcl_UniChar ch; |
1743 |
|
1744 |
result = TCL_OK; |
1745 |
|
1746 |
srcStart = src; |
1747 |
srcEnd = src + srcLen; |
1748 |
srcClose = srcEnd; |
1749 |
if ((flags & TCL_ENCODING_END) == 0) { |
1750 |
srcClose -= TCL_UTF_MAX; |
1751 |
} |
1752 |
|
1753 |
dstStart = dst; |
1754 |
dstEnd = dst + dstLen - TCL_UTF_MAX; |
1755 |
|
1756 |
for (numChars = 0; src < srcEnd; numChars++) { |
1757 |
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { |
1758 |
/* |
1759 |
* If there is more string to follow, this will ensure that the |
1760 |
* last UTF-8 character in the source buffer hasn't been cut off. |
1761 |
*/ |
1762 |
|
1763 |
result = TCL_CONVERT_MULTIBYTE; |
1764 |
break; |
1765 |
} |
1766 |
if (dst > dstEnd) { |
1767 |
result = TCL_CONVERT_NOSPACE; |
1768 |
break; |
1769 |
} |
1770 |
src += Tcl_UtfToUniChar(src, &ch); |
1771 |
dst += Tcl_UniCharToUtf(ch, dst); |
1772 |
} |
1773 |
|
1774 |
*srcReadPtr = src - srcStart; |
1775 |
*dstWrotePtr = dst - dstStart; |
1776 |
*dstCharsPtr = numChars; |
1777 |
return result; |
1778 |
} |
1779 |
|
1780 |
/* |
1781 |
*------------------------------------------------------------------------- |
1782 |
* |
1783 |
* UnicodeToUtfProc -- |
1784 |
* |
1785 |
* Convert from Unicode to UTF-8. |
1786 |
* |
1787 |
* Results: |
1788 |
* Returns TCL_OK if conversion was successful. |
1789 |
* |
1790 |
* Side effects: |
1791 |
* None. |
1792 |
* |
1793 |
*------------------------------------------------------------------------- |
1794 |
*/ |
1795 |
|
1796 |
static int |
1797 |
UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, |
1798 |
srcReadPtr, dstWrotePtr, dstCharsPtr) |
1799 |
ClientData clientData; /* Not used. */ |
1800 |
CONST char *src; /* Source string in Unicode. */ |
1801 |
int srcLen; /* Source string length in bytes. */ |
1802 |
int flags; /* Conversion control flags. */ |
1803 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
1804 |
* state information used during a piecewise |
1805 |
* conversion. Contents of statePtr are |
1806 |
* initialized and/or reset by conversion |
1807 |
* routine under control of flags argument. */ |
1808 |
char *dst; /* Output buffer in which converted string |
1809 |
* is stored. */ |
1810 |
int dstLen; /* The maximum length of output buffer in |
1811 |
* bytes. */ |
1812 |
int *srcReadPtr; /* Filled with the number of bytes from the |
1813 |
* source string that were converted. This |
1814 |
* may be less than the original source length |
1815 |
* if there was a problem converting some |
1816 |
* source characters. */ |
1817 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
1818 |
* stored in the output buffer as a result of |
1819 |
* the conversion. */ |
1820 |
int *dstCharsPtr; /* Filled with the number of characters that |
1821 |
* correspond to the bytes stored in the |
1822 |
* output buffer. */ |
1823 |
{ |
1824 |
CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd; |
1825 |
char *dstEnd, *dstStart; |
1826 |
int result, numChars; |
1827 |
|
1828 |
result = TCL_OK; |
1829 |
if ((srcLen % sizeof(Tcl_UniChar)) != 0) { |
1830 |
result = TCL_CONVERT_MULTIBYTE; |
1831 |
srcLen /= sizeof(Tcl_UniChar); |
1832 |
srcLen *= sizeof(Tcl_UniChar); |
1833 |
} |
1834 |
|
1835 |
wSrc = (Tcl_UniChar *) src; |
1836 |
|
1837 |
wSrcStart = (Tcl_UniChar *) src; |
1838 |
wSrcEnd = (Tcl_UniChar *) (src + srcLen); |
1839 |
|
1840 |
dstStart = dst; |
1841 |
dstEnd = dst + dstLen - TCL_UTF_MAX; |
1842 |
|
1843 |
for (numChars = 0; wSrc < wSrcEnd; numChars++) { |
1844 |
if (dst > dstEnd) { |
1845 |
result = TCL_CONVERT_NOSPACE; |
1846 |
break; |
1847 |
} |
1848 |
dst += Tcl_UniCharToUtf(*wSrc, dst); |
1849 |
wSrc++; |
1850 |
} |
1851 |
|
1852 |
*srcReadPtr = (char *) wSrc - (char *) wSrcStart; |
1853 |
*dstWrotePtr = dst - dstStart; |
1854 |
*dstCharsPtr = numChars; |
1855 |
return result; |
1856 |
} |
1857 |
|
1858 |
/* |
1859 |
*------------------------------------------------------------------------- |
1860 |
* |
1861 |
* UtfToUnicodeProc -- |
1862 |
* |
1863 |
* Convert from UTF-8 to Unicode. |
1864 |
* |
1865 |
* Results: |
1866 |
* Returns TCL_OK if conversion was successful. |
1867 |
* |
1868 |
* Side effects: |
1869 |
* None. |
1870 |
* |
1871 |
*------------------------------------------------------------------------- |
1872 |
*/ |
1873 |
|
1874 |
static int |
1875 |
UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, |
1876 |
srcReadPtr, dstWrotePtr, dstCharsPtr) |
1877 |
ClientData clientData; /* TableEncodingData that specifies encoding. */ |
1878 |
CONST char *src; /* Source string in UTF-8. */ |
1879 |
int srcLen; /* Source string length in bytes. */ |
1880 |
int flags; /* Conversion control flags. */ |
1881 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
1882 |
* state information used during a piecewise |
1883 |
* conversion. Contents of statePtr are |
1884 |
* initialized and/or reset by conversion |
1885 |
* routine under control of flags argument. */ |
1886 |
char *dst; /* Output buffer in which converted string |
1887 |
* is stored. */ |
1888 |
int dstLen; /* The maximum length of output buffer in |
1889 |
* bytes. */ |
1890 |
int *srcReadPtr; /* Filled with the number of bytes from the |
1891 |
* source string that were converted. This |
1892 |
* may be less than the original source length |
1893 |
* if there was a problem converting some |
1894 |
* source characters. */ |
1895 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
1896 |
* stored in the output buffer as a result of |
1897 |
* the conversion. */ |
1898 |
int *dstCharsPtr; /* Filled with the number of characters that |
1899 |
* correspond to the bytes stored in the |
1900 |
* output buffer. */ |
1901 |
{ |
1902 |
CONST char *srcStart, *srcEnd, *srcClose; |
1903 |
Tcl_UniChar *wDst, *wDstStart, *wDstEnd; |
1904 |
int result, numChars; |
1905 |
|
1906 |
srcStart = src; |
1907 |
srcEnd = src + srcLen; |
1908 |
srcClose = srcEnd; |
1909 |
if ((flags & TCL_ENCODING_END) == 0) { |
1910 |
srcClose -= TCL_UTF_MAX; |
1911 |
} |
1912 |
|
1913 |
wDst = (Tcl_UniChar *) dst; |
1914 |
wDstStart = (Tcl_UniChar *) dst; |
1915 |
wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar)); |
1916 |
|
1917 |
result = TCL_OK; |
1918 |
for (numChars = 0; src < srcEnd; numChars++) { |
1919 |
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { |
1920 |
/* |
1921 |
* If there is more string to follow, this will ensure that the |
1922 |
* last UTF-8 character in the source buffer hasn't been cut off. |
1923 |
*/ |
1924 |
|
1925 |
result = TCL_CONVERT_MULTIBYTE; |
1926 |
break; |
1927 |
} |
1928 |
if (wDst > wDstEnd) { |
1929 |
result = TCL_CONVERT_NOSPACE; |
1930 |
break; |
1931 |
} |
1932 |
src += Tcl_UtfToUniChar(src, wDst); |
1933 |
wDst++; |
1934 |
} |
1935 |
*srcReadPtr = src - srcStart; |
1936 |
*dstWrotePtr = (char *) wDst - (char *) wDstStart; |
1937 |
*dstCharsPtr = numChars; |
1938 |
return result; |
1939 |
} |
1940 |
|
1941 |
/* |
1942 |
*------------------------------------------------------------------------- |
1943 |
* |
1944 |
* TableToUtfProc -- |
1945 |
* |
1946 |
* Convert from the encoding specified by the TableEncodingData into |
1947 |
* UTF-8. |
1948 |
* |
1949 |
* Results: |
1950 |
* Returns TCL_OK if conversion was successful. |
1951 |
* |
1952 |
* Side effects: |
1953 |
* None. |
1954 |
* |
1955 |
*------------------------------------------------------------------------- |
1956 |
*/ |
1957 |
|
1958 |
static int |
1959 |
TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, |
1960 |
srcReadPtr, dstWrotePtr, dstCharsPtr) |
1961 |
ClientData clientData; /* TableEncodingData that specifies |
1962 |
* encoding. */ |
1963 |
CONST char *src; /* Source string in specified encoding. */ |
1964 |
int srcLen; /* Source string length in bytes. */ |
1965 |
int flags; /* Conversion control flags. */ |
1966 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
1967 |
* state information used during a piecewise |
1968 |
* conversion. Contents of statePtr are |
1969 |
* initialized and/or reset by conversion |
1970 |
* routine under control of flags argument. */ |
1971 |
char *dst; /* Output buffer in which converted string |
1972 |
* is stored. */ |
1973 |
int dstLen; /* The maximum length of output buffer in |
1974 |
* bytes. */ |
1975 |
int *srcReadPtr; /* Filled with the number of bytes from the |
1976 |
* source string that were converted. This |
1977 |
* may be less than the original source length |
1978 |
* if there was a problem converting some |
1979 |
* source characters. */ |
1980 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
1981 |
* stored in the output buffer as a result of |
1982 |
* the conversion. */ |
1983 |
int *dstCharsPtr; /* Filled with the number of characters that |
1984 |
* correspond to the bytes stored in the |
1985 |
* output buffer. */ |
1986 |
{ |
1987 |
CONST char *srcStart, *srcEnd; |
1988 |
char *dstEnd, *dstStart, *prefixBytes; |
1989 |
int result, byte, numChars; |
1990 |
Tcl_UniChar ch; |
1991 |
unsigned short **toUnicode; |
1992 |
unsigned short *pageZero; |
1993 |
TableEncodingData *dataPtr; |
1994 |
|
1995 |
srcStart = src; |
1996 |
srcEnd = src + srcLen; |
1997 |
|
1998 |
dstStart = dst; |
1999 |
dstEnd = dst + dstLen - TCL_UTF_MAX; |
2000 |
|
2001 |
dataPtr = (TableEncodingData *) clientData; |
2002 |
toUnicode = dataPtr->toUnicode; |
2003 |
prefixBytes = dataPtr->prefixBytes; |
2004 |
pageZero = toUnicode[0]; |
2005 |
|
2006 |
result = TCL_OK; |
2007 |
for (numChars = 0; src < srcEnd; numChars++) { |
2008 |
if (dst > dstEnd) { |
2009 |
result = TCL_CONVERT_NOSPACE; |
2010 |
break; |
2011 |
} |
2012 |
byte = *((unsigned char *) src); |
2013 |
if (prefixBytes[byte]) { |
2014 |
src++; |
2015 |
if (src >= srcEnd) { |
2016 |
src--; |
2017 |
result = TCL_CONVERT_MULTIBYTE; |
2018 |
break; |
2019 |
} |
2020 |
ch = toUnicode[byte][*((unsigned char *) src)]; |
2021 |
} else { |
2022 |
ch = pageZero[byte]; |
2023 |
} |
2024 |
if ((ch == 0) && (byte != 0)) { |
2025 |
if (flags & TCL_ENCODING_STOPONERROR) { |
2026 |
result = TCL_CONVERT_SYNTAX; |
2027 |
break; |
2028 |
} |
2029 |
if (prefixBytes[byte]) { |
2030 |
src--; |
2031 |
} |
2032 |
ch = (Tcl_UniChar) byte; |
2033 |
} |
2034 |
dst += Tcl_UniCharToUtf(ch, dst); |
2035 |
src++; |
2036 |
} |
2037 |
*srcReadPtr = src - srcStart; |
2038 |
*dstWrotePtr = dst - dstStart; |
2039 |
*dstCharsPtr = numChars; |
2040 |
return result; |
2041 |
} |
2042 |
|
2043 |
/* |
2044 |
*------------------------------------------------------------------------- |
2045 |
* |
2046 |
* TableFromUtfProc -- |
2047 |
* |
2048 |
* Convert from UTF-8 into the encoding specified by the |
2049 |
* TableEncodingData. |
2050 |
* |
2051 |
* Results: |
2052 |
* Returns TCL_OK if conversion was successful. |
2053 |
* |
2054 |
* Side effects: |
2055 |
* None. |
2056 |
* |
2057 |
*------------------------------------------------------------------------- |
2058 |
*/ |
2059 |
|
2060 |
static int |
2061 |
TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, |
2062 |
srcReadPtr, dstWrotePtr, dstCharsPtr) |
2063 |
ClientData clientData; /* TableEncodingData that specifies |
2064 |
* encoding. */ |
2065 |
CONST char *src; /* Source string in UTF-8. */ |
2066 |
int srcLen; /* Source string length in bytes. */ |
2067 |
int flags; /* Conversion control flags. */ |
2068 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
2069 |
* state information used during a piecewise |
2070 |
* conversion. Contents of statePtr are |
2071 |
* initialized and/or reset by conversion |
2072 |
* routine under control of flags argument. */ |
2073 |
char *dst; /* Output buffer in which converted string |
2074 |
* is stored. */ |
2075 |
int dstLen; /* The maximum length of output buffer in |
2076 |
* bytes. */ |
2077 |
int *srcReadPtr; /* Filled with the number of bytes from the |
2078 |
* source string that were converted. This |
2079 |
* may be less than the original source length |
2080 |
* if there was a problem converting some |
2081 |
* source characters. */ |
2082 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
2083 |
* stored in the output buffer as a result of |
2084 |
* the conversion. */ |
2085 |
int *dstCharsPtr; /* Filled with the number of characters that |
2086 |
* correspond to the bytes stored in the |
2087 |
* output buffer. */ |
2088 |
{ |
2089 |
CONST char *srcStart, *srcEnd, *srcClose; |
2090 |
char *dstStart, *dstEnd, *prefixBytes; |
2091 |
Tcl_UniChar ch; |
2092 |
int result, len, word, numChars; |
2093 |
TableEncodingData *dataPtr; |
2094 |
unsigned short **fromUnicode; |
2095 |
|
2096 |
result = TCL_OK; |
2097 |
|
2098 |
dataPtr = (TableEncodingData *) clientData; |
2099 |
prefixBytes = dataPtr->prefixBytes; |
2100 |
fromUnicode = dataPtr->fromUnicode; |
2101 |
|
2102 |
srcStart = src; |
2103 |
srcEnd = src + srcLen; |
2104 |
srcClose = srcEnd; |
2105 |
if ((flags & TCL_ENCODING_END) == 0) { |
2106 |
srcClose -= TCL_UTF_MAX; |
2107 |
} |
2108 |
|
2109 |
dstStart = dst; |
2110 |
dstEnd = dst + dstLen - 1; |
2111 |
|
2112 |
for (numChars = 0; src < srcEnd; numChars++) { |
2113 |
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { |
2114 |
/* |
2115 |
* If there is more string to follow, this will ensure that the |
2116 |
* last UTF-8 character in the source buffer hasn't been cut off. |
2117 |
*/ |
2118 |
|
2119 |
result = TCL_CONVERT_MULTIBYTE; |
2120 |
break; |
2121 |
} |
2122 |
len = Tcl_UtfToUniChar(src, &ch); |
2123 |
word = fromUnicode[(ch >> 8)][ch & 0xff]; |
2124 |
if ((word == 0) && (ch != 0)) { |
2125 |
if (flags & TCL_ENCODING_STOPONERROR) { |
2126 |
result = TCL_CONVERT_UNKNOWN; |
2127 |
break; |
2128 |
} |
2129 |
word = dataPtr->fallback; |
2130 |
} |
2131 |
if (prefixBytes[(word >> 8)] != 0) { |
2132 |
if (dst + 1 > dstEnd) { |
2133 |
result = TCL_CONVERT_NOSPACE; |
2134 |
break; |
2135 |
} |
2136 |
dst[0] = (char) (word >> 8); |
2137 |
dst[1] = (char) word; |
2138 |
dst += 2; |
2139 |
} else { |
2140 |
if (dst > dstEnd) { |
2141 |
result = TCL_CONVERT_NOSPACE; |
2142 |
break; |
2143 |
} |
2144 |
dst[0] = (char) word; |
2145 |
dst++; |
2146 |
} |
2147 |
src += len; |
2148 |
} |
2149 |
*srcReadPtr = src - srcStart; |
2150 |
*dstWrotePtr = dst - dstStart; |
2151 |
*dstCharsPtr = numChars; |
2152 |
return result; |
2153 |
} |
2154 |
|
2155 |
/* |
2156 |
*--------------------------------------------------------------------------- |
2157 |
* |
2158 |
* TableFreeProc -- |
2159 |
* |
2160 |
* This procedure is invoked when an encoding is deleted. It deletes |
2161 |
* the memory used by the TableEncodingData. |
2162 |
* |
2163 |
* Results: |
2164 |
* None. |
2165 |
* |
2166 |
* Side effects: |
2167 |
* Memory freed. |
2168 |
* |
2169 |
*--------------------------------------------------------------------------- |
2170 |
*/ |
2171 |
|
2172 |
static void |
2173 |
TableFreeProc(clientData) |
2174 |
ClientData clientData; /* TableEncodingData that specifies |
2175 |
* encoding. */ |
2176 |
{ |
2177 |
TableEncodingData *dataPtr; |
2178 |
|
2179 |
dataPtr = (TableEncodingData *) clientData; |
2180 |
ckfree((char *) dataPtr->toUnicode); |
2181 |
ckfree((char *) dataPtr->fromUnicode); |
2182 |
ckfree((char *) dataPtr); |
2183 |
} |
2184 |
|
2185 |
/* |
2186 |
*------------------------------------------------------------------------- |
2187 |
* |
2188 |
* EscapeToUtfProc -- |
2189 |
* |
2190 |
* Convert from the encoding specified by the EscapeEncodingData into |
2191 |
* UTF-8. |
2192 |
* |
2193 |
* Results: |
2194 |
* Returns TCL_OK if conversion was successful. |
2195 |
* |
2196 |
* Side effects: |
2197 |
* None. |
2198 |
* |
2199 |
*------------------------------------------------------------------------- |
2200 |
*/ |
2201 |
|
2202 |
static int |
2203 |
EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, |
2204 |
srcReadPtr, dstWrotePtr, dstCharsPtr) |
2205 |
ClientData clientData; /* EscapeEncodingData that specifies |
2206 |
* encoding. */ |
2207 |
CONST char *src; /* Source string in specified encoding. */ |
2208 |
int srcLen; /* Source string length in bytes. */ |
2209 |
int flags; /* Conversion control flags. */ |
2210 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
2211 |
* state information used during a piecewise |
2212 |
* conversion. Contents of statePtr are |
2213 |
* initialized and/or reset by conversion |
2214 |
* routine under control of flags argument. */ |
2215 |
char *dst; /* Output buffer in which converted string |
2216 |
* is stored. */ |
2217 |
int dstLen; /* The maximum length of output buffer in |
2218 |
* bytes. */ |
2219 |
int *srcReadPtr; /* Filled with the number of bytes from the |
2220 |
* source string that were converted. This |
2221 |
* may be less than the original source length |
2222 |
* if there was a problem converting some |
2223 |
* source characters. */ |
2224 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
2225 |
* stored in the output buffer as a result of |
2226 |
* the conversion. */ |
2227 |
int *dstCharsPtr; /* Filled with the number of characters that |
2228 |
* correspond to the bytes stored in the |
2229 |
* output buffer. */ |
2230 |
{ |
2231 |
EscapeEncodingData *dataPtr; |
2232 |
char *prefixBytes, *tablePrefixBytes; |
2233 |
unsigned short **tableToUnicode; |
2234 |
Encoding *encodingPtr; |
2235 |
int state, result, numChars; |
2236 |
CONST char *srcStart, *srcEnd; |
2237 |
char *dstStart, *dstEnd; |
2238 |
|
2239 |
result = TCL_OK; |
2240 |
|
2241 |
tablePrefixBytes = NULL; /* lint. */ |
2242 |
tableToUnicode = NULL; /* lint. */ |
2243 |
|
2244 |
dataPtr = (EscapeEncodingData *) clientData; |
2245 |
prefixBytes = dataPtr->prefixBytes; |
2246 |
encodingPtr = NULL; |
2247 |
|
2248 |
srcStart = src; |
2249 |
srcEnd = src + srcLen; |
2250 |
|
2251 |
dstStart = dst; |
2252 |
dstEnd = dst + dstLen - TCL_UTF_MAX; |
2253 |
|
2254 |
state = (int) *statePtr; |
2255 |
if (flags & TCL_ENCODING_START) { |
2256 |
state = 0; |
2257 |
} |
2258 |
|
2259 |
for (numChars = 0; src < srcEnd; ) { |
2260 |
int byte, hi, lo, ch; |
2261 |
|
2262 |
if (dst > dstEnd) { |
2263 |
result = TCL_CONVERT_NOSPACE; |
2264 |
break; |
2265 |
} |
2266 |
byte = *((unsigned char *) src); |
2267 |
if (prefixBytes[byte]) { |
2268 |
unsigned int left, len, longest; |
2269 |
int checked, i; |
2270 |
EscapeSubTable *subTablePtr; |
2271 |
|
2272 |
/* |
2273 |
* Saw the beginning of an escape sequence. |
2274 |
*/ |
2275 |
|
2276 |
left = srcEnd - src; |
2277 |
len = dataPtr->initLen; |
2278 |
longest = len; |
2279 |
checked = 0; |
2280 |
if (len <= left) { |
2281 |
checked++; |
2282 |
if ((len > 0) && |
2283 |
(memcmp(src, dataPtr->init, len) == 0)) { |
2284 |
/* |
2285 |
* If we see initialization string, skip it, even if we're |
2286 |
* not at the beginning of the buffer. |
2287 |
*/ |
2288 |
|
2289 |
src += len; |
2290 |
continue; |
2291 |
} |
2292 |
} |
2293 |
len = dataPtr->finalLen; |
2294 |
if (len > longest) { |
2295 |
longest = len; |
2296 |
} |
2297 |
if (len <= left) { |
2298 |
checked++; |
2299 |
if ((len > 0) && |
2300 |
(memcmp(src, dataPtr->final, len) == 0)) { |
2301 |
/* |
2302 |
* If we see finalization string, skip it, even if we're |
2303 |
* not at the end of the buffer. |
2304 |
*/ |
2305 |
|
2306 |
src += len; |
2307 |
continue; |
2308 |
} |
2309 |
} |
2310 |
subTablePtr = dataPtr->subTables; |
2311 |
for (i = 0; i < dataPtr->numSubTables; i++) { |
2312 |
len = subTablePtr->sequenceLen; |
2313 |
if (len > longest) { |
2314 |
longest = len; |
2315 |
} |
2316 |
if (len <= left) { |
2317 |
checked++; |
2318 |
if ((len > 0) && |
2319 |
(memcmp(src, subTablePtr->sequence, len) == 0)) { |
2320 |
state = i; |
2321 |
encodingPtr = NULL; |
2322 |
subTablePtr = NULL; |
2323 |
src += len; |
2324 |
break; |
2325 |
} |
2326 |
} |
2327 |
subTablePtr++; |
2328 |
} |
2329 |
if (subTablePtr == NULL) { |
2330 |
/* |
2331 |
* A match was found, the escape sequence was consumed, and |
2332 |
* the state was updated. |
2333 |
*/ |
2334 |
|
2335 |
continue; |
2336 |
} |
2337 |
|
2338 |
/* |
2339 |
* We have a split-up or unrecognized escape sequence. If we |
2340 |
* checked all the sequences, then it's a syntax error, |
2341 |
* otherwise we need more bytes to determine a match. |
2342 |
*/ |
2343 |
|
2344 |
if ((checked == dataPtr->numSubTables + 2) |
2345 |
|| (flags & TCL_ENCODING_END)) { |
2346 |
if ((flags & TCL_ENCODING_STOPONERROR) == 0) { |
2347 |
/* |
2348 |
* Skip the unknown escape sequence. |
2349 |
*/ |
2350 |
|
2351 |
src += longest; |
2352 |
continue; |
2353 |
} |
2354 |
result = TCL_CONVERT_SYNTAX; |
2355 |
} else { |
2356 |
result = TCL_CONVERT_MULTIBYTE; |
2357 |
} |
2358 |
break; |
2359 |
} |
2360 |
|
2361 |
if (encodingPtr == NULL) { |
2362 |
TableEncodingData *tableDataPtr; |
2363 |
|
2364 |
encodingPtr = GetTableEncoding(dataPtr, state); |
2365 |
tableDataPtr = (TableEncodingData *) encodingPtr->clientData; |
2366 |
tablePrefixBytes = tableDataPtr->prefixBytes; |
2367 |
tableToUnicode = tableDataPtr->toUnicode; |
2368 |
} |
2369 |
if (tablePrefixBytes[byte]) { |
2370 |
src++; |
2371 |
if (src >= srcEnd) { |
2372 |
src--; |
2373 |
result = TCL_CONVERT_MULTIBYTE; |
2374 |
break; |
2375 |
} |
2376 |
hi = byte; |
2377 |
lo = *((unsigned char *) src); |
2378 |
} else { |
2379 |
hi = 0; |
2380 |
lo = byte; |
2381 |
} |
2382 |
ch = tableToUnicode[hi][lo]; |
2383 |
dst += Tcl_UniCharToUtf(ch, dst); |
2384 |
src++; |
2385 |
numChars++; |
2386 |
} |
2387 |
|
2388 |
*statePtr = (Tcl_EncodingState) state; |
2389 |
*srcReadPtr = src - srcStart; |
2390 |
*dstWrotePtr = dst - dstStart; |
2391 |
*dstCharsPtr = numChars; |
2392 |
return result; |
2393 |
} |
2394 |
|
2395 |
/* |
2396 |
*------------------------------------------------------------------------- |
2397 |
* |
2398 |
* EscapeFromUtfProc -- |
2399 |
* |
2400 |
* Convert from UTF-8 into the encoding specified by the |
2401 |
* EscapeEncodingData. |
2402 |
* |
2403 |
* Results: |
2404 |
* Returns TCL_OK if conversion was successful. |
2405 |
* |
2406 |
* Side effects: |
2407 |
* None. |
2408 |
* |
2409 |
*------------------------------------------------------------------------- |
2410 |
*/ |
2411 |
|
2412 |
static int |
2413 |
EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, |
2414 |
srcReadPtr, dstWrotePtr, dstCharsPtr) |
2415 |
ClientData clientData; /* EscapeEncodingData that specifies |
2416 |
* encoding. */ |
2417 |
CONST char *src; /* Source string in UTF-8. */ |
2418 |
int srcLen; /* Source string length in bytes. */ |
2419 |
int flags; /* Conversion control flags. */ |
2420 |
Tcl_EncodingState *statePtr;/* Place for conversion routine to store |
2421 |
* state information used during a piecewise |
2422 |
* conversion. Contents of statePtr are |
2423 |
* initialized and/or reset by conversion |
2424 |
* routine under control of flags argument. */ |
2425 |
char *dst; /* Output buffer in which converted string |
2426 |
* is stored. */ |
2427 |
int dstLen; /* The maximum length of output buffer in |
2428 |
* bytes. */ |
2429 |
int *srcReadPtr; /* Filled with the number of bytes from the |
2430 |
* source string that were converted. This |
2431 |
* may be less than the original source length |
2432 |
* if there was a problem converting some |
2433 |
* source characters. */ |
2434 |
int *dstWrotePtr; /* Filled with the number of bytes that were |
2435 |
* stored in the output buffer as a result of |
2436 |
* the conversion. */ |
2437 |
int *dstCharsPtr; /* Filled with the number of characters that |
2438 |
* correspond to the bytes stored in the |
2439 |
* output buffer. */ |
2440 |
{ |
2441 |
EscapeEncodingData *dataPtr; |
2442 |
Encoding *encodingPtr; |
2443 |
CONST char *srcStart, *srcEnd, *srcClose; |
2444 |
char *dstStart, *dstEnd; |
2445 |
int state, result, numChars; |
2446 |
TableEncodingData *tableDataPtr; |
2447 |
char *tablePrefixBytes; |
2448 |
unsigned short **tableFromUnicode; |
2449 |
|
2450 |
result = TCL_OK; |
2451 |
|
2452 |
dataPtr = (EscapeEncodingData *) clientData; |
2453 |
|
2454 |
srcStart = src; |
2455 |
srcEnd = src + srcLen; |
2456 |
srcClose = srcEnd; |
2457 |
if ((flags & TCL_ENCODING_END) == 0) { |
2458 |
srcClose -= TCL_UTF_MAX; |
2459 |
} |
2460 |
|
2461 |
dstStart = dst; |
2462 |
dstEnd = dst + dstLen - 1; |
2463 |
|
2464 |
if (flags & TCL_ENCODING_START) { |
2465 |
unsigned int len; |
2466 |
|
2467 |
state = 0; |
2468 |
len = dataPtr->subTables[0].sequenceLen; |
2469 |
if (dst + dataPtr->initLen + len > dstEnd) { |
2470 |
*srcReadPtr = 0; |
2471 |
*dstWrotePtr = 0; |
2472 |
return TCL_CONVERT_NOSPACE; |
2473 |
} |
2474 |
memcpy((VOID *) dst, (VOID *) dataPtr->init, |
2475 |
(size_t) dataPtr->initLen); |
2476 |
dst += dataPtr->initLen; |
2477 |
memcpy((VOID *) dst, (VOID *) dataPtr->subTables[0].sequence, |
2478 |
(size_t) len); |
2479 |
dst += len; |
2480 |
} else { |
2481 |
state = (int) *statePtr; |
2482 |
} |
2483 |
|
2484 |
encodingPtr = GetTableEncoding(dataPtr, state); |
2485 |
tableDataPtr = (TableEncodingData *) encodingPtr->clientData; |
2486 |
tablePrefixBytes = tableDataPtr->prefixBytes; |
2487 |
tableFromUnicode = tableDataPtr->fromUnicode; |
2488 |
|
2489 |
for (numChars = 0; src < srcEnd; numChars++) { |
2490 |
unsigned int len; |
2491 |
int word; |
2492 |
Tcl_UniChar ch; |
2493 |
|
2494 |
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { |
2495 |
/* |
2496 |
* If there is more string to follow, this will ensure that the |
2497 |
* last UTF-8 character in the source buffer hasn't been cut off. |
2498 |
*/ |
2499 |
|
2500 |
result = TCL_CONVERT_MULTIBYTE; |
2501 |
break; |
2502 |
} |
2503 |
len = Tcl_UtfToUniChar(src, &ch); |
2504 |
word = tableFromUnicode[(ch >> 8)][ch & 0xff]; |
2505 |
|
2506 |
if ((word == 0) && (ch != 0)) { |
2507 |
int oldState; |
2508 |
EscapeSubTable *subTablePtr; |
2509 |
|
2510 |
oldState = state; |
2511 |
for (state = 0; state < dataPtr->numSubTables; state++) { |
2512 |
encodingPtr = GetTableEncoding(dataPtr, state); |
2513 |
tableDataPtr = (TableEncodingData *) encodingPtr->clientData; |
2514 |
word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xff]; |
2515 |
if (word != 0) { |
2516 |
break; |
2517 |
} |
2518 |
} |
2519 |
|
2520 |
if (word == 0) { |
2521 |
state = oldState; |
2522 |
if (flags & TCL_ENCODING_STOPONERROR) { |
2523 |
result = TCL_CONVERT_UNKNOWN; |
2524 |
break; |
2525 |
} |
2526 |
encodingPtr = GetTableEncoding(dataPtr, state); |
2527 |
tableDataPtr = (TableEncodingData *) encodingPtr->clientData; |
2528 |
word = tableDataPtr->fallback; |
2529 |
} |
2530 |
|
2531 |
tablePrefixBytes = tableDataPtr->prefixBytes; |
2532 |
tableFromUnicode = tableDataPtr->fromUnicode; |
2533 |
|
2534 |
subTablePtr = &dataPtr->subTables[state]; |
2535 |
if (dst + subTablePtr->sequenceLen > dstEnd) { |
2536 |
result = TCL_CONVERT_NOSPACE; |
2537 |
break; |
2538 |
} |
2539 |
memcpy((VOID *) dst, (VOID *) subTablePtr->sequence, |
2540 |
(size_t) subTablePtr->sequenceLen); |
2541 |
dst += subTablePtr->sequenceLen; |
2542 |
} |
2543 |
|
2544 |
if (tablePrefixBytes[(word >> 8)] != 0) { |
2545 |
if (dst + 1 > dstEnd) { |
2546 |
result = TCL_CONVERT_NOSPACE; |
2547 |
break; |
2548 |
} |
2549 |
dst[0] = (char) (word >> 8); |
2550 |
dst[1] = (char) word; |
2551 |
dst += 2; |
2552 |
} else { |
2553 |
if (dst > dstEnd) { |
2554 |
result = TCL_CONVERT_NOSPACE; |
2555 |
break; |
2556 |
} |
2557 |
dst[0] = (char) word; |
2558 |
dst++; |
2559 |
} |
2560 |
src += len; |
2561 |
} |
2562 |
|
2563 |
if ((result == TCL_OK) && (flags & TCL_ENCODING_END)) { |
2564 |
if (dst + dataPtr->finalLen > dstEnd) { |
2565 |
result = TCL_CONVERT_NOSPACE; |
2566 |
} else { |
2567 |
memcpy((VOID *) dst, (VOID *) dataPtr->final, |
2568 |
(size_t) dataPtr->finalLen); |
2569 |
dst += dataPtr->finalLen; |
2570 |
} |
2571 |
} |
2572 |
|
2573 |
*statePtr = (Tcl_EncodingState) state; |
2574 |
*srcReadPtr = src - srcStart; |
2575 |
*dstWrotePtr = dst - dstStart; |
2576 |
*dstCharsPtr = numChars; |
2577 |
return result; |
2578 |
} |
2579 |
|
2580 |
/* |
2581 |
*--------------------------------------------------------------------------- |
2582 |
* |
2583 |
* EscapeFreeProc -- |
2584 |
* |
2585 |
* This procedure is invoked when an EscapeEncodingData encoding is |
2586 |
* deleted. It deletes the memory used by the encoding. |
2587 |
* |
2588 |
* Results: |
2589 |
* None. |
2590 |
* |
2591 |
* Side effects: |
2592 |
* Memory freed. |
2593 |
* |
2594 |
*--------------------------------------------------------------------------- |
2595 |
*/ |
2596 |
|
2597 |
static void |
2598 |
EscapeFreeProc(clientData) |
2599 |
ClientData clientData; /* EscapeEncodingData that specifies encoding. */ |
2600 |
{ |
2601 |
EscapeEncodingData *dataPtr; |
2602 |
EscapeSubTable *subTablePtr; |
2603 |
int i; |
2604 |
|
2605 |
dataPtr = (EscapeEncodingData *) clientData; |
2606 |
if (dataPtr == NULL) { |
2607 |
return; |
2608 |
} |
2609 |
subTablePtr = dataPtr->subTables; |
2610 |
for (i = 0; i < dataPtr->numSubTables; i++) { |
2611 |
FreeEncoding((Tcl_Encoding) subTablePtr->encodingPtr); |
2612 |
subTablePtr++; |
2613 |
} |
2614 |
ckfree((char *) dataPtr); |
2615 |
} |
2616 |
|
2617 |
/* |
2618 |
*--------------------------------------------------------------------------- |
2619 |
* |
2620 |
* GetTableEncoding -- |
2621 |
* |
2622 |
* Helper function for the EscapeEncodingData conversions. Gets the |
2623 |
* encoding (of type TextEncodingData) that represents the specified |
2624 |
* state. |
2625 |
* |
2626 |
* Results: |
2627 |
* The return value is the encoding. |
2628 |
* |
2629 |
* Side effects: |
2630 |
* If the encoding that represents the specified state has not |
2631 |
* already been used by this EscapeEncoding, it will be loaded |
2632 |
* and cached in the dataPtr. |
2633 |
* |
2634 |
*--------------------------------------------------------------------------- |
2635 |
*/ |
2636 |
|
2637 |
static Encoding * |
2638 |
GetTableEncoding(dataPtr, state) |
2639 |
EscapeEncodingData *dataPtr;/* Contains names of encodings. */ |
2640 |
int state; /* Index in dataPtr of desired Encoding. */ |
2641 |
{ |
2642 |
EscapeSubTable *subTablePtr; |
2643 |
Encoding *encodingPtr; |
2644 |
|
2645 |
subTablePtr = &dataPtr->subTables[state]; |
2646 |
encodingPtr = subTablePtr->encodingPtr; |
2647 |
if (encodingPtr == NULL) { |
2648 |
encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name); |
2649 |
if ((encodingPtr == NULL) |
2650 |
|| (encodingPtr->toUtfProc != TableToUtfProc)) { |
2651 |
panic("EscapeToUtfProc: invalid sub table"); |
2652 |
} |
2653 |
subTablePtr->encodingPtr = encodingPtr; |
2654 |
} |
2655 |
return encodingPtr; |
2656 |
} |
2657 |
|
2658 |
/* |
2659 |
*--------------------------------------------------------------------------- |
2660 |
* |
2661 |
* unilen -- |
2662 |
* |
2663 |
* A helper function for the Tcl_ExternalToUtf functions. This |
2664 |
* function is similar to strlen for double-byte characters: it |
2665 |
* returns the number of bytes in a 0x0000 terminated string. |
2666 |
* |
2667 |
* Results: |
2668 |
* As above. |
2669 |
* |
2670 |
* Side effects: |
2671 |
* None. |
2672 |
* |
2673 |
*--------------------------------------------------------------------------- |
2674 |
*/ |
2675 |
|
2676 |
static size_t |
2677 |
unilen(src) |
2678 |
CONST char *src; |
2679 |
{ |
2680 |
unsigned short *p; |
2681 |
|
2682 |
p = (unsigned short *) src; |
2683 |
while (*p != 0x0000) { |
2684 |
p++; |
2685 |
} |
2686 |
return (char *) p - src; |
2687 |
} |
2688 |
|
2689 |
|
2690 |
/* |
2691 |
*------------------------------------------------------------------------- |
2692 |
* |
2693 |
* TclFindEncodings -- |
2694 |
* |
2695 |
* Find and load the encoding file for this operating system. |
2696 |
* Before this is called, Tcl makes assumptions about the |
2697 |
* native string representation, but the true encoding is not |
2698 |
* assured. |
2699 |
* |
2700 |
* Results: |
2701 |
* None. |
2702 |
* |
2703 |
* Side effects: |
2704 |
* Varied, see the respective initialization routines. |
2705 |
* |
2706 |
*------------------------------------------------------------------------- |
2707 |
*/ |
2708 |
|
2709 |
void |
2710 |
TclFindEncodings(argv0) |
2711 |
CONST char *argv0; /* Name of executable from argv[0] to main() |
2712 |
* in native multi-byte encoding. */ |
2713 |
{ |
2714 |
char *native; |
2715 |
Tcl_Obj *pathPtr; |
2716 |
Tcl_DString libPath, buffer; |
2717 |
|
2718 |
if (encodingsInitialized == 0) { |
2719 |
/* |
2720 |
* Double check inside the mutex. There may be calls |
2721 |
* back into this routine from some of the procedures below. |
2722 |
*/ |
2723 |
|
2724 |
TclpInitLock(); |
2725 |
if (encodingsInitialized == 0) { |
2726 |
/* |
2727 |
* Have to set this bit here to avoid deadlock with the |
2728 |
* routines below us that call into TclInitSubsystems. |
2729 |
*/ |
2730 |
|
2731 |
encodingsInitialized = 1; |
2732 |
|
2733 |
native = TclpFindExecutable(argv0); |
2734 |
TclpInitLibraryPath(native); |
2735 |
|
2736 |
/* |
2737 |
* The library path was set in the TclpInitLibraryPath routine. |
2738 |
* The string set is a dirty UTF string. To preserve the value |
2739 |
* convert the UTF string back to native before setting the new |
2740 |
* default encoding. |
2741 |
*/ |
2742 |
|
2743 |
pathPtr = TclGetLibraryPath(); |
2744 |
if (pathPtr != NULL) { |
2745 |
Tcl_UtfToExternalDString(NULL, Tcl_GetString(pathPtr), -1, |
2746 |
&libPath); |
2747 |
} |
2748 |
|
2749 |
TclpSetInitialEncodings(); |
2750 |
|
2751 |
/* |
2752 |
* Now convert the native string back to UTF. |
2753 |
*/ |
2754 |
|
2755 |
if (pathPtr != NULL) { |
2756 |
Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&libPath), -1, |
2757 |
&buffer); |
2758 |
pathPtr = Tcl_NewStringObj(Tcl_DStringValue(&buffer), -1); |
2759 |
TclSetLibraryPath(pathPtr); |
2760 |
|
2761 |
Tcl_DStringFree(&libPath); |
2762 |
Tcl_DStringFree(&buffer); |
2763 |
} |
2764 |
} |
2765 |
TclpInitUnlock(); |
2766 |
} |
2767 |
} |
2768 |
|
2769 |
/* End of tclencoding.c */ |