1 | /* |
---|
2 | * Copyright (c) 2003-2004, Artem B. Bityuckiy |
---|
3 | * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved. |
---|
4 | * |
---|
5 | * Redistribution and use in source and binary forms, with or without |
---|
6 | * modification, are permitted provided that the following conditions |
---|
7 | * are met: |
---|
8 | * 1. Redistributions of source code must retain the above copyright |
---|
9 | * notice, this list of conditions and the following disclaimer. |
---|
10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer in the |
---|
12 | * documentation and/or other materials provided with the distribution. |
---|
13 | * |
---|
14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
---|
15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
---|
18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
24 | * SUCH DAMAGE. |
---|
25 | */ |
---|
26 | #include "cesbi.h" |
---|
27 | |
---|
28 | #if defined (ICONV_TO_UCS_CES_UTF_8) \ |
---|
29 | || defined (ICONV_FROM_UCS_CES_UTF_8) |
---|
30 | |
---|
31 | #include <_ansi.h> |
---|
32 | #include <reent.h> |
---|
33 | #include <sys/types.h> |
---|
34 | #include "../lib/local.h" |
---|
35 | #include "../lib/ucsconv.h" |
---|
36 | |
---|
37 | #define UTF8_MB_CUR_MAX 6 |
---|
38 | |
---|
39 | /* |
---|
40 | * UTF-8 CES converter doesn't interpret BOM. Reject overlong sequences, |
---|
41 | * U'FFFF, U'FFFE codes, UTF-16 surrogate codes and all codes > 0x7FFFFFFF. |
---|
42 | */ |
---|
43 | |
---|
44 | #if defined (ICONV_FROM_UCS_CES_UTF_8) |
---|
45 | static size_t |
---|
46 | convert_from_ucs (void *data, |
---|
47 | register ucs4_t in, |
---|
48 | unsigned char **outbuf, |
---|
49 | size_t *outbytesleft) |
---|
50 | { |
---|
51 | register unsigned char *cp; |
---|
52 | register size_t bytes; |
---|
53 | |
---|
54 | if ((in >= 0x0000D800 && in <= 0x0000DFFF) |
---|
55 | || in > 0x7FFFFFFF || in == 0x0000FFFF || in == 0x0000FFFE) |
---|
56 | return (size_t)ICONV_CES_INVALID_CHARACTER; |
---|
57 | |
---|
58 | if (in < 0x80) |
---|
59 | bytes = 1; |
---|
60 | else if (in < 0x800) |
---|
61 | bytes = 2; |
---|
62 | else if (in < 0x10000) |
---|
63 | bytes = 3; |
---|
64 | else if (in < 0x200000) |
---|
65 | bytes = 4; |
---|
66 | else if (in < 0x4000000) |
---|
67 | bytes = 5; |
---|
68 | else |
---|
69 | bytes = 6; |
---|
70 | |
---|
71 | if (*outbytesleft < bytes) |
---|
72 | return (size_t)ICONV_CES_NOSPACE; |
---|
73 | |
---|
74 | cp = *outbuf; |
---|
75 | |
---|
76 | switch (bytes) |
---|
77 | { |
---|
78 | case 1: |
---|
79 | *cp = (unsigned char)in; |
---|
80 | break; |
---|
81 | |
---|
82 | case 2: |
---|
83 | *cp++ = (unsigned char)((in >> 6) | 0x000000C0); |
---|
84 | *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080); |
---|
85 | break; |
---|
86 | |
---|
87 | case 3: |
---|
88 | *cp++ = (unsigned char)((in >> 12) | 0x000000E0); |
---|
89 | *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080); |
---|
90 | *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080); |
---|
91 | break; |
---|
92 | |
---|
93 | case 4: |
---|
94 | *cp++ = (unsigned char)((in >> 18) | 0x000000F0); |
---|
95 | *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080); |
---|
96 | *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080); |
---|
97 | *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080); |
---|
98 | break; |
---|
99 | |
---|
100 | case 5: |
---|
101 | *cp++ = (unsigned char)((in >> 24) | 0x000000F8); |
---|
102 | *cp++ = (unsigned char)(((in >> 18) & 0x0000003F) | 0x00000080); |
---|
103 | *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080); |
---|
104 | *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080); |
---|
105 | *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080); |
---|
106 | break; |
---|
107 | |
---|
108 | case 6: |
---|
109 | *cp++ = (unsigned char)((in >> 30) | 0x000000FC); |
---|
110 | *cp++ = (unsigned char)(((in >> 24) & 0x0000003F) | 0x00000080); |
---|
111 | *cp++ = (unsigned char)(((in >> 18) & 0x0000003F) | 0x00000080); |
---|
112 | *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080); |
---|
113 | *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080); |
---|
114 | *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080); |
---|
115 | break; |
---|
116 | } |
---|
117 | |
---|
118 | *outbytesleft -= bytes; |
---|
119 | *outbuf += bytes; |
---|
120 | |
---|
121 | return bytes; |
---|
122 | } |
---|
123 | #endif /* ICONV_FROM_UCS_CES_UTF_8 */ |
---|
124 | |
---|
125 | #if defined (ICONV_TO_UCS_CES_UTF_8) |
---|
126 | static ucs4_t |
---|
127 | convert_to_ucs (void *data, |
---|
128 | const unsigned char **inbuf, |
---|
129 | size_t *inbytesleft) |
---|
130 | { |
---|
131 | register const unsigned char *in = *inbuf; |
---|
132 | register size_t bytes; |
---|
133 | ucs4_t res; |
---|
134 | |
---|
135 | if (in[0] >= 0xC0) |
---|
136 | { |
---|
137 | if (in[0] < 0xE0) |
---|
138 | { |
---|
139 | if (*inbytesleft < (bytes = 2)) |
---|
140 | return (ucs4_t)ICONV_CES_BAD_SEQUENCE; |
---|
141 | |
---|
142 | if ( ((in[0] & ~0x1F) == 0xC0) |
---|
143 | && ((in[1] & 0xC0) == 0x80)) |
---|
144 | res = ((ucs4_t)(in[0] & 0x1F) << 6) |
---|
145 | | ((ucs4_t)(in[1] & 0x3F)); |
---|
146 | else |
---|
147 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
148 | |
---|
149 | if (res < 0x00000080) /* Overlong sequence */ |
---|
150 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
151 | } |
---|
152 | |
---|
153 | else if (in[0] < 0xF0) |
---|
154 | { |
---|
155 | if (*inbytesleft < (bytes = 3)) |
---|
156 | return (ucs4_t)ICONV_CES_BAD_SEQUENCE; |
---|
157 | |
---|
158 | if ( ((in[0] & ~0x0F) == 0xE0) |
---|
159 | && ((in[1] & 0xC0) == 0x80) |
---|
160 | && ((in[2] & 0xC0) == 0x80)) |
---|
161 | res = ((ucs4_t)(in[0] & 0x0F) << 12) |
---|
162 | | ((ucs4_t)(in[1] & 0x3F) << 6) |
---|
163 | | ((ucs4_t)(in[2] & 0x3F)); |
---|
164 | else |
---|
165 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
166 | |
---|
167 | if (res < 0x00000800) /* Overlong sequence */ |
---|
168 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
169 | } |
---|
170 | |
---|
171 | else if (in[0] < 0xF8) |
---|
172 | { |
---|
173 | if (*inbytesleft < (bytes = 4)) |
---|
174 | return (ucs4_t)ICONV_CES_BAD_SEQUENCE; |
---|
175 | |
---|
176 | if ( ((in[0] & ~0x07) == 0xF0) |
---|
177 | && ((in[1] & 0xC0) == 0x80) |
---|
178 | && ((in[2] & 0xC0) == 0x80) |
---|
179 | && ((in[3] & 0xC0) == 0x80)) |
---|
180 | res = ((ucs4_t)(in[0] & 0x07) << 18) |
---|
181 | | ((ucs4_t)(in[1] & 0x3F) << 12) |
---|
182 | | ((ucs4_t)(in[2] & 0x3F) << 6) |
---|
183 | | ((ucs4_t)(in[3] & 0x3F)); |
---|
184 | else |
---|
185 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
186 | |
---|
187 | if (res < 0x00010000) /* Overlong sequence */ |
---|
188 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
189 | } |
---|
190 | |
---|
191 | else if (in[0] < 0xFC) |
---|
192 | { |
---|
193 | if (*inbytesleft < (bytes = 5)) |
---|
194 | return (ucs4_t)ICONV_CES_BAD_SEQUENCE; |
---|
195 | |
---|
196 | if ( ((in[0] & ~0x03) == 0xF8) |
---|
197 | && ((in[1] & 0xC0) == 0x80) |
---|
198 | && ((in[2] & 0xC0) == 0x80) |
---|
199 | && ((in[3] & 0xC0) == 0x80) |
---|
200 | && ((in[4] & 0xC0) == 0x80)) |
---|
201 | res = ((ucs4_t)(in[0] & 0x03) << 24) |
---|
202 | | ((ucs4_t)(in[1] & 0x3F) << 18) |
---|
203 | | ((ucs4_t)(in[2] & 0x3F) << 12) |
---|
204 | | ((ucs4_t)(in[3] & 0x3F) << 6) |
---|
205 | | ((ucs4_t)(in[4] & 0x3F)); |
---|
206 | else |
---|
207 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
208 | |
---|
209 | if (res < 0x00200000) /* Overlong sequence */ |
---|
210 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
211 | } |
---|
212 | |
---|
213 | else if (in[0] <= 0xFD) |
---|
214 | { |
---|
215 | if (*inbytesleft < (bytes = 6)) |
---|
216 | return (ucs4_t)ICONV_CES_BAD_SEQUENCE; |
---|
217 | |
---|
218 | if ( ((in[0] & ~0x01) == 0xFC) |
---|
219 | && ((in[1] & 0xC0) == 0x80) |
---|
220 | && ((in[2] & 0xC0) == 0x80) |
---|
221 | && ((in[3] & 0xC0) == 0x80) |
---|
222 | && ((in[4] & 0xC0) == 0x80) |
---|
223 | && ((in[5] & 0xC0) == 0x80)) |
---|
224 | res = ((ucs4_t)(in[0] & 0x1) << 30) |
---|
225 | | ((ucs4_t)(in[1] & 0x3F) << 24) |
---|
226 | | ((ucs4_t)(in[2] & 0x3F) << 18) |
---|
227 | | ((ucs4_t)(in[3] & 0x3F) << 12) |
---|
228 | | ((ucs4_t)(in[4] & 0x3F) << 6) |
---|
229 | | ((ucs4_t)(in[5] & 0x3F)); |
---|
230 | else |
---|
231 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
232 | |
---|
233 | if (res < 0x04000000) /* Overlong sequence */ |
---|
234 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
235 | } |
---|
236 | |
---|
237 | else |
---|
238 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
239 | } |
---|
240 | else if (in[0] & 0x80) |
---|
241 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
242 | else |
---|
243 | { |
---|
244 | res = (ucs4_t)in[0]; |
---|
245 | bytes = 1; |
---|
246 | } |
---|
247 | |
---|
248 | if ( (res >= 0x0000D800 && res <= 0x0000DFFF) |
---|
249 | || res > 0x7FFFFFFF || res == 0x0000FFFF || res == 0x0000FFFE) |
---|
250 | return (ucs4_t)ICONV_CES_INVALID_CHARACTER; |
---|
251 | |
---|
252 | *inbytesleft -= bytes; |
---|
253 | *inbuf += bytes; |
---|
254 | |
---|
255 | return res; |
---|
256 | } |
---|
257 | #endif /* ICONV_TO_UCS_CES_UTF_8 */ |
---|
258 | |
---|
259 | static int |
---|
260 | get_mb_cur_max (void *data) |
---|
261 | { |
---|
262 | return UTF8_MB_CUR_MAX; |
---|
263 | } |
---|
264 | |
---|
265 | #if defined (ICONV_TO_UCS_CES_UTF_8) |
---|
266 | const iconv_to_ucs_ces_handlers_t |
---|
267 | _iconv_to_ucs_ces_handlers_utf_8 = |
---|
268 | { |
---|
269 | NULL, |
---|
270 | NULL, |
---|
271 | get_mb_cur_max, |
---|
272 | NULL, |
---|
273 | NULL, |
---|
274 | NULL, |
---|
275 | convert_to_ucs |
---|
276 | }; |
---|
277 | #endif |
---|
278 | |
---|
279 | #if defined (ICONV_FROM_UCS_CES_UTF_8) |
---|
280 | const iconv_from_ucs_ces_handlers_t |
---|
281 | _iconv_from_ucs_ces_handlers_utf_8 = |
---|
282 | { |
---|
283 | NULL, |
---|
284 | NULL, |
---|
285 | get_mb_cur_max, |
---|
286 | NULL, |
---|
287 | NULL, |
---|
288 | NULL, |
---|
289 | convert_from_ucs |
---|
290 | }; |
---|
291 | #endif |
---|
292 | |
---|
293 | #endif /* ICONV_TO_UCS_CES_UTF_8 || ICONV_FROM_UCS_CES_UTF_8 */ |
---|
294 | |
---|