source: trunk/libs/newlib/src/newlib/libc/iconv/ces/utf-16.c @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 7.6 KB
Line 
1/*
2 * Copyright (c) 2003-2004, Artem B. Bityuckiy
3 * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26#include "cesbi.h"
27
28#if defined (ICONV_TO_UCS_CES_UTF_16) \
29 || defined (ICONV_FROM_UCS_CES_UTF_16)
30
31#include <_ansi.h>
32#include <reent.h>
33#include <sys/types.h>
34#include <stdlib.h>
35#include <string.h>
36#include <wchar.h>
37#include "../lib/local.h"
38#include "../lib/ucsconv.h"
39#include "../lib/endian.h"
40
41/*
42 * On input UTF-16 converter interpret BOM and uses Big Endian byte order if BOM
43 * is absent. UTF-16 converter outputs in System Endian and adds correspondent
44 * BOM as first code. UTF-16LE and UTF-16BE converters ignore BOM on input and
45 * don't output BOM.
46 */
47
48#define UTF16_UNDEFINED     0x00
49#define UTF16_BIG_ENDIAN    0x01
50#define UTF16_LITTLE_ENDIAN 0x02
51#define UTF16_SYSTEM_ENDIAN 0x04
52#define UTF16_BOM_WRITTEN   0x08
53
54#define UTF16_BOM 0xFEFF
55
56#define UTF_16   "utf_16"
57#define UTF_16BE "utf_16be"
58#define UTF_16LE "utf_16le"
59
60static size_t
61utf_16_close (struct _reent *rptr,
62                     void *data)
63{
64  _free_r(rptr, data);
65  return 0;
66}
67
68#if defined (ICONV_FROM_UCS_CES_UTF_16)
69static void *
70utf_16_init_from_ucs (struct _reent *rptr,
71                             const char *encoding)
72{
73  int *data;
74 
75  if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
76    return (void *)NULL;
77 
78  if (strcmp (encoding, UTF_16LE) == 0)
79    *data = UTF16_LITTLE_ENDIAN;
80  else if (strcmp (encoding, UTF_16BE) == 0)
81    *data = UTF16_BIG_ENDIAN;
82  else
83    *data = UTF16_SYSTEM_ENDIAN;
84     
85  return (void *)data;
86}
87
88static size_t
89utf_16_convert_from_ucs (void *data,
90                                register ucs4_t in,
91                                unsigned char **outbuf,
92                                size_t *outbytesleft)
93{
94  register ucs2_t *cp;
95  register size_t bytes;
96  register int *state;
97 
98  if (in > 0x0010FFFF || (in >= 0x0000D800 && in <= 0x0000DFFF)
99      || in == 0x0000FFFF || in == 0x0000FFFE)
100    return (size_t)ICONV_CES_INVALID_CHARACTER; 
101
102  state = (int *)data;
103  bytes = (*state == UTF16_SYSTEM_ENDIAN) ? sizeof (ucs2_t) * 2 
104                                          : sizeof (ucs2_t);
105
106  if (in > 0x0000FFFF)
107    bytes += sizeof (ucs2_t);
108
109  if (*outbytesleft < bytes)
110    return (size_t)ICONV_CES_NOSPACE;
111
112  cp = (ucs2_t *)*outbuf;
113
114  if (*state == UTF16_SYSTEM_ENDIAN)
115    {
116      *cp++ = UTF16_BOM;
117      *state |= UTF16_BOM_WRITTEN;
118    }
119
120  if (in < 0x00010000)
121    {
122      switch (*state)
123        {
124          case UTF16_LITTLE_ENDIAN:
125            *cp = ICONV_HTOLES ((ucs2_t)in);
126            break;
127          case UTF16_BIG_ENDIAN:
128            *cp = ICONV_HTOBES ((ucs2_t)in);
129            break;
130          case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
131            *cp = (ucs2_t)in;
132            break;
133        }
134    }
135  else
136    {
137      ucs2_t w1, w2;
138     
139      /* Process surrogate pair */
140      in -= 0x00010000;
141      w1 = ((ucs2_t)((in >> 10)) & 0x03FF) | 0xD800;
142      w2 = (ucs2_t)(in & 0x000003FF) | 0xDC00;
143
144      switch (*state)
145        {
146          case UTF16_LITTLE_ENDIAN:
147            *cp++ = ICONV_HTOLES (w1);
148            *cp = ICONV_HTOLES (w2);
149            break;
150          case UTF16_BIG_ENDIAN:
151            *cp++ = ICONV_HTOBES (w1);
152            *cp = ICONV_HTOBES (w2);
153            break;
154          case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
155            *cp++ = w1;
156            *cp = w2;
157            break;
158        }
159    }
160 
161  *outbuf += bytes;
162  *outbytesleft -= bytes;
163
164  return bytes;
165}
166#endif /* ICONV_FROM_UCS_CES_UTF_16 */
167
168#if defined (ICONV_TO_UCS_CES_UTF_16)
169static void *
170utf_16_init_to_ucs (struct _reent *rptr,
171                           const char *encoding)
172{
173  int *data;
174 
175  if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
176    return (void *)NULL;
177 
178  if (strcmp (encoding, UTF_16BE) == 0)
179    *data = UTF16_BIG_ENDIAN;
180  else if (strcmp (encoding, UTF_16LE) == 0)
181    *data = UTF16_LITTLE_ENDIAN;
182  else
183    *data = UTF16_UNDEFINED;
184     
185  return (void *)data;
186}
187
188static ucs4_t
189utf_16_convert_to_ucs (void *data,
190                              const unsigned char **inbuf,
191                              size_t *inbytesleft)
192{
193  register ucs2_t w1;
194  register ucs2_t w2;
195  register ucs2_t *cp;
196  int *state;
197  ucs4_t res;
198  int bytes = sizeof (ucs2_t);
199
200  if (*inbytesleft < bytes)
201    return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
202 
203  state = (int *)data;
204  cp = ((ucs2_t *)*inbuf);
205
206  if (*state == UTF16_UNDEFINED)
207    {
208      if (*cp == ICONV_HTOLES(UTF16_BOM))
209        *state = UTF16_LITTLE_ENDIAN;
210      else
211        *state = UTF16_BIG_ENDIAN;
212
213     if (   *cp == ICONV_HTOBES (UTF16_BOM)
214         || *cp == ICONV_HTOLES (UTF16_BOM))
215       {
216         if (*inbytesleft < (bytes += sizeof (ucs2_t)))
217           return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
218         cp += 1;
219       }
220    }
221   
222  if (*state == UTF16_LITTLE_ENDIAN)     
223    w1 = ICONV_LETOHS (*cp);
224  else
225    w1 = ICONV_BETOHS (*cp);
226
227  if (w1  < 0xD800 || w1 > 0xDFFF)
228    {
229      if (w1 == 0xFFFF || w1 == 0xFFFE)
230        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
231      res = (ucs4_t)w1;
232    }
233  else
234    {
235      /* Process surrogate pair */
236      if (*inbytesleft < (bytes += 2))
237        return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
238   
239      if (w1 > 0xDBFF)
240        /* Broken surrogate character */
241        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
242       
243      cp += 1;
244
245      if (*state == UTF16_LITTLE_ENDIAN)     
246        w2 = ICONV_LETOHS (*cp);
247      else
248        w2 = ICONV_BETOHS (*cp);
249 
250      if (w2 < 0xDC00 || w2 > 0xDFFF)
251        /* Broken surrogate character */
252        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
253   
254      res = (ucs4_t)(w2 & 0x03FF) | ((ucs4_t)(w1 & 0x03FF) << 10);
255      res += 0x00010000;
256    }
257
258  *inbuf += bytes;
259  *inbytesleft -= bytes;
260 
261  return res;
262}
263#endif /* ICONV_TO_UCS_CES_UTF_16 */
264
265static int
266utf_16_get_mb_cur_max (void *data)
267{
268  return 6;
269}
270
271#if defined (ICONV_TO_UCS_CES_UTF_16)
272const iconv_to_ucs_ces_handlers_t
273_iconv_to_ucs_ces_handlers_utf_16 = 
274{
275  utf_16_init_to_ucs,
276  utf_16_close,
277  utf_16_get_mb_cur_max,
278  NULL,
279  NULL,
280  NULL,
281  utf_16_convert_to_ucs
282};
283#endif
284
285#if defined (ICONV_FROM_UCS_CES_UTF_16)
286const iconv_from_ucs_ces_handlers_t
287_iconv_from_ucs_ces_handlers_utf_16 =
288{
289  utf_16_init_from_ucs,
290  utf_16_close,
291  utf_16_get_mb_cur_max,
292  NULL,
293  NULL,
294  NULL,
295  utf_16_convert_from_ucs
296};
297#endif
298
299#endif /* ICONV_TO_UCS_CES_UTF_16 || ICONV_FROM_UCS_CES_UTF_16 */
300
Note: See TracBrowser for help on using the repository browser.