1 | /* |
---|
2 | * Copyright (c) 2003-2004, Artem B. Bityuckiy |
---|
3 | * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved. |
---|
4 | * |
---|
5 | * Redistribution and use in source and binary forms, with or without |
---|
6 | * modification, are permitted provided that the following conditions |
---|
7 | * are met: |
---|
8 | * 1. Redistributions of source code must retain the above copyright |
---|
9 | * notice, this list of conditions and the following disclaimer. |
---|
10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer in the |
---|
12 | * documentation and/or other materials provided with the distribution. |
---|
13 | * |
---|
14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
---|
15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
---|
18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
24 | * SUCH DAMAGE. |
---|
25 | */ |
---|
26 | #include "cesbi.h" |
---|
27 | |
---|
28 | #if defined (ICONV_TO_UCS_CES_UTF_16) \ |
---|
29 | || defined (ICONV_FROM_UCS_CES_UTF_16) |
---|
30 | |
---|
31 | #include <_ansi.h> |
---|
32 | #include <reent.h> |
---|
33 | #include <sys/types.h> |
---|
34 | #include <stdlib.h> |
---|
35 | #include <string.h> |
---|
36 | #include <wchar.h> |
---|
37 | #include "../lib/local.h" |
---|
38 | #include "../lib/ucsconv.h" |
---|
39 | #include "../lib/endian.h" |
---|
40 | |
---|
41 | /* |
---|
42 | * On input UTF-16 converter interpret BOM and uses Big Endian byte order if BOM |
---|
43 | * is absent. UTF-16 converter outputs in System Endian and adds correspondent |
---|
44 | * BOM as first code. UTF-16LE and UTF-16BE converters ignore BOM on input and |
---|
45 | * don't output BOM. |
---|
46 | */ |
---|
47 | |
---|
48 | #define UTF16_UNDEFINED 0x00 |
---|
49 | #define UTF16_BIG_ENDIAN 0x01 |
---|
50 | #define UTF16_LITTLE_ENDIAN 0x02 |
---|
51 | #define UTF16_SYSTEM_ENDIAN 0x04 |
---|
52 | #define UTF16_BOM_WRITTEN 0x08 |
---|
53 | |
---|
54 | #define UTF16_BOM 0xFEFF |
---|
55 | |
---|
56 | #define UTF_16 "utf_16" |
---|
57 | #define UTF_16BE "utf_16be" |
---|
58 | #define UTF_16LE "utf_16le" |
---|
59 | |
---|
60 | static size_t |
---|
61 | utf_16_close (struct _reent *rptr, |
---|
62 | void *data) |
---|
63 | { |
---|
64 | _free_r(rptr, data); |
---|
65 | return 0; |
---|
66 | } |
---|
67 | |
---|
68 | #if defined (ICONV_FROM_UCS_CES_UTF_16) |
---|
69 | static void * |
---|
70 | utf_16_init_from_ucs (struct _reent *rptr, |
---|
71 | const char *encoding) |
---|
72 | { |
---|
73 | int *data; |
---|
74 | |
---|
75 | if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL) |
---|
76 | return (void *)NULL; |
---|
77 | |
---|
78 | if (strcmp (encoding, UTF_16LE) == 0) |
---|
79 | *data = UTF16_LITTLE_ENDIAN; |
---|
80 | else if (strcmp (encoding, UTF_16BE) == 0) |
---|
81 | *data = UTF16_BIG_ENDIAN; |
---|
82 | else |
---|
83 | *data = UTF16_SYSTEM_ENDIAN; |
---|
84 | |
---|
85 | return (void *)data; |
---|
86 | } |
---|
87 | |
---|
88 | static size_t |
---|
89 | utf_16_convert_from_ucs (void *data, |
---|
90 | register ucs4_t in, |
---|
91 | unsigned char **outbuf, |
---|
92 | size_t *outbytesleft) |
---|
93 | { |
---|
94 | register ucs2_t *cp; |
---|
95 | register size_t bytes; |
---|
96 | register int *state; |
---|
97 | |
---|
98 | if (in > 0x0010FFFF || (in >= 0x0000D800 && in <= 0x0000DFFF) |
---|
99 | || in == 0x0000FFFF || in == 0x0000FFFE) |
---|
100 | return (size_t)ICONV_CES_INVALID_CHARACTER; |
---|
101 | |
---|
102 | state = (int *)data; |
---|
103 | bytes = (*state == UTF16_SYSTEM_ENDIAN) ? sizeof (ucs2_t) * 2 |
---|
104 | : sizeof (ucs2_t); |
---|
105 | |
---|
106 | if (in > 0x0000FFFF) |
---|
107 | bytes += sizeof (ucs2_t); |
---|
108 | |
---|
109 | if (*outbytesleft < bytes) |
---|
110 | return (size_t)ICONV_CES_NOSPACE; |
---|
111 | |
---|
112 | cp = (ucs2_t *)*outbuf; |
---|
113 | |
---|
114 | if (*state == UTF16_SYSTEM_ENDIAN) |
---|
115 | { |
---|
116 | *cp++ = UTF16_BOM; |
---|
117 | *state |= UTF16_BOM_WRITTEN; |
---|
118 | } |
---|
119 | |
---|
120 | if (in < 0x00010000) |
---|
121 | { |
---|
122 | switch (*state) |
---|
123 | { |
---|
124 | case UTF16_LITTLE_ENDIAN: |
---|
125 | *cp = ICONV_HTOLES ((ucs2_t)in); |
---|
126 | break; |
---|
127 | case UTF16_BIG_ENDIAN: |
---|
128 | *cp = ICONV_HTOBES ((ucs2_t)in); |
---|
129 | break; |
---|
130 | case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN): |
---|
131 | *cp = (ucs2_t)in; |
---|
132 | break; |
---|
133 | } |
---|
134 | } |
---|
135 | else |
---|
136 | { |
---|
137 | ucs2_t w1, w2; |
---|
138 | |
---|
139 | /* Process surrogate pair */ |
---|
140 | in -= 0x00010000; |
---|
141 | w1 = ((ucs2_t)((in >> 10)) & 0x03FF) | 0xD800; |
---|
142 | w2 = (ucs2_t)(in & 0x000003FF) | 0xDC00; |
---|
143 | |
---|
144 | switch (*state) |
---|
145 | { |
---|
146 | case UTF16_LITTLE_ENDIAN: |
---|
147 | *cp++ = ICONV_HTOLES (w1); |
---|
148 | *cp = ICONV_HTOLES (w2); |
---|
149 | break; |
---|
150 | case UTF16_BIG_ENDIAN: |
---|
151 | *cp++ = ICONV_HTOBES (w1); |
---|
152 | *cp = ICONV_HTOBES (w2); |
---|
153 | break; |
---|
154 | case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN): |
---|
155 | *cp++ = w1; |
---|
156 | *cp = w2; |
---|
157 | break; |
---|
158 | } |
---|
159 | } |
---|
160 | |
---|
161 | *outbuf += bytes; |
---|
162 | *outbytesleft -= bytes; |
---|
163 | |
---|
164 | return bytes; |
---|
165 | } |
---|
166 | #endif /* ICONV_FROM_UCS_CES_UTF_16 */ |
---|
167 | |
---|
168 | #if defined (ICONV_TO_UCS_CES_UTF_16) |
---|
169 | static void * |
---|
170 | utf_16_init_to_ucs (struct _reent *rptr, |
---|
171 | const char *encoding) |
---|
172 | { |
---|
173 | int *data; |
---|
174 | |
---|
175 | if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL) |
---|
176 | return (void *)NULL; |
---|
177 | |
---|
178 | if (strcmp (encoding, UTF_16BE) == 0) |
---|
179 | *data = UTF16_BIG_ENDIAN; |
---|
180 | else if (strcmp (encoding, UTF_16LE) == 0) |
---|
181 | *data = UTF16_LITTLE_ENDIAN; |
---|
182 | else |
---|
183 | *data = UTF16_UNDEFINED; |
---|
184 | |
---|
185 | return (void *)data; |
---|
186 | } |
---|
187 | |
---|
188 | static ucs4_t |
---|
189 | utf_16_convert_to_ucs (void *data, |
---|
190 | const unsigned char **inbuf, |
---|
191 | size_t *inbytesleft) |
---|
192 | { |
---|
193 | register ucs2_t w1; |
---|
194 | register ucs2_t w2; |
---|
195 | register ucs2_t *cp; |
---|
196 | int *state; |
---|
197 | ucs4_t res; |
---|
198 | int bytes = sizeof (ucs2_t); |
---|
199 | |
---|
200 | if (*inbytesleft < bytes) |
---|
201 | return (ucs4_t)ICONV_CES_BAD_SEQUENCE; |
---|
202 | |
---|
203 | state = (int *)data; |
---|
204 | cp = ((ucs2_t *)*inbuf); |
---|
205 | |
---|
206 | if (*state == UTF16_UNDEFINED) |
---|
207 | { |
---|
208 | if (*cp == ICONV_HTOLES(UTF16_BOM)) |
---|
209 | *state = UTF16_LITTLE_ENDIAN; |
---|
210 | else |
---|
211 | *state = UTF16_BIG_ENDIAN; |
---|
212 | |
---|
213 | if ( *cp == ICONV_HTOBES (UTF16_BOM) |
---|
214 | || *cp == ICONV_HTOLES (UTF16_BOM)) |
---|
215 | { |
---|
216 | if (*inbytesleft < (bytes += sizeof (ucs2_t))) |
---|
217 | return (ucs4_t)ICONV_CES_BAD_SEQUENCE; |
---|
218 | cp += 1; |
---|
219 | } |
---|
220 | } |
---|
221 | |
---|
222 | if (*state == UTF16_LITTLE_ENDIAN) |
---|
223 | w1 = ICONV_LETOHS (*cp); |
---|
224 | else |
---|
225 | w1 = ICONV_BETOHS (*cp); |
---|
226 | |
---|
227 | if (w1 < 0xD800 || w1 > 0xDFFF) |
---|
228 | { |
---|
229 | if (w1 == 0xFFFF || w1 == 0xFFFE) |
---|
230 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
231 | res = (ucs4_t)w1; |
---|
232 | } |
---|
233 | else |
---|
234 | { |
---|
235 | /* Process surrogate pair */ |
---|
236 | if (*inbytesleft < (bytes += 2)) |
---|
237 | return (ucs4_t)ICONV_CES_BAD_SEQUENCE; |
---|
238 | |
---|
239 | if (w1 > 0xDBFF) |
---|
240 | /* Broken surrogate character */ |
---|
241 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
242 | |
---|
243 | cp += 1; |
---|
244 | |
---|
245 | if (*state == UTF16_LITTLE_ENDIAN) |
---|
246 | w2 = ICONV_LETOHS (*cp); |
---|
247 | else |
---|
248 | w2 = ICONV_BETOHS (*cp); |
---|
249 | |
---|
250 | if (w2 < 0xDC00 || w2 > 0xDFFF) |
---|
251 | /* Broken surrogate character */ |
---|
252 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
253 | |
---|
254 | res = (ucs4_t)(w2 & 0x03FF) | ((ucs4_t)(w1 & 0x03FF) << 10); |
---|
255 | res += 0x00010000; |
---|
256 | } |
---|
257 | |
---|
258 | *inbuf += bytes; |
---|
259 | *inbytesleft -= bytes; |
---|
260 | |
---|
261 | return res; |
---|
262 | } |
---|
263 | #endif /* ICONV_TO_UCS_CES_UTF_16 */ |
---|
264 | |
---|
265 | static int |
---|
266 | utf_16_get_mb_cur_max (void *data) |
---|
267 | { |
---|
268 | return 6; |
---|
269 | } |
---|
270 | |
---|
271 | #if defined (ICONV_TO_UCS_CES_UTF_16) |
---|
272 | const iconv_to_ucs_ces_handlers_t |
---|
273 | _iconv_to_ucs_ces_handlers_utf_16 = |
---|
274 | { |
---|
275 | utf_16_init_to_ucs, |
---|
276 | utf_16_close, |
---|
277 | utf_16_get_mb_cur_max, |
---|
278 | NULL, |
---|
279 | NULL, |
---|
280 | NULL, |
---|
281 | utf_16_convert_to_ucs |
---|
282 | }; |
---|
283 | #endif |
---|
284 | |
---|
285 | #if defined (ICONV_FROM_UCS_CES_UTF_16) |
---|
286 | const iconv_from_ucs_ces_handlers_t |
---|
287 | _iconv_from_ucs_ces_handlers_utf_16 = |
---|
288 | { |
---|
289 | utf_16_init_from_ucs, |
---|
290 | utf_16_close, |
---|
291 | utf_16_get_mb_cur_max, |
---|
292 | NULL, |
---|
293 | NULL, |
---|
294 | NULL, |
---|
295 | utf_16_convert_from_ucs |
---|
296 | }; |
---|
297 | #endif |
---|
298 | |
---|
299 | #endif /* ICONV_TO_UCS_CES_UTF_16 || ICONV_FROM_UCS_CES_UTF_16 */ |
---|
300 | |
---|