source: trunk/libs/newlib/src/newlib/libc/locale/locale.c @ 444

Last change on this file since 444 was 444, checked in by satin@…, 6 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 27.3 KB
Line 
1/*
2FUNCTION
3<<setlocale>>, <<localeconv>>---select or query locale
4
5INDEX
6        setlocale
7INDEX
8        localeconv
9INDEX
10        _setlocale_r
11INDEX
12        _localeconv_r
13
14SYNOPSIS
15        #include <locale.h>
16        char *setlocale(int <[category]>, const char *<[locale]>);
17        lconv *localeconv(void);
18
19        char *_setlocale_r(void *<[reent]>,
20                        int <[category]>, const char *<[locale]>);
21        lconv *_localeconv_r(void *<[reent]>);
22
23DESCRIPTION
24<<setlocale>> is the facility defined by ANSI C to condition the
25execution environment for international collating and formatting
26information; <<localeconv>> reports on the settings of the current
27locale.
28
29This is a minimal implementation, supporting only the required <<"POSIX">>
30and <<"C">> values for <[locale]>; strings representing other locales are not
31honored unless _MB_CAPABLE is defined.
32
33If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
34the form
35
36  language[_TERRITORY][.charset][@@modifier]
37
38<<"language">> is a two character string per ISO 639, or, if not available
39for a given language, a three character string per ISO 639-3.
40<<"TERRITORY">> is a country code per ISO 3166.  For <<"charset">> and
41<<"modifier">> see below.
42
43Additionally to the POSIX specifier, the following extension is supported
44for backward compatibility with older implementations using newlib:
45<<"C-charset">>.
46Instead of <<"C-">>, you can also specify <<"C.">>.  Both variations allow
47to specify language neutral locales while using other charsets than ASCII,
48for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
49but uses the UTF-8 charset.
50
51The following charsets are recognized:
52<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
53<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
541 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
55857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
561257, 1258].
57
58Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
59are equivalent.  Charset names with dashes can also be written without
60dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>.  <<"EUCJP">> and
61<<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
62
63Full support for all of the above charsets requires that newlib has been
64build with multibyte support and support for all ISO and Windows Codepage.
65Otherwise all singlebyte charsets are simply mapped to ASCII.  Right now,
66only newlib for Cygwin is built with full charset support by default.
67Under Cygwin, this implementation additionally supports the charsets
68<<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>.  Cygwin
69does not support <<"JIS">>.
70
71Cygwin additionally supports locales from the file
72/usr/share/locale/locale.alias.
73
74(<<"">> is also accepted; if given, the settings are read from the
75corresponding LC_* environment variables and $LANG according to POSIX rules.)
76
77This implementation also supports the modifier <<"cjknarrow">>, which
78affects how the functions <<wcwidth>> and <<wcswidth>> handle characters
79from the "CJK Ambiguous Width" category of characters described at
80http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
81of 1 for singlebyte charsets and a width of 2 for multibyte charsets
82other than UTF-8. For UTF-8, their width depends on the language specifier:
83it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
84and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
85independent of charset and language.
86
87If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
88pointer to the string representing the current locale.  The acceptable
89values for <[category]> are defined in `<<locale.h>>' as macros
90beginning with <<"LC_">>.
91
92<<localeconv>> returns a pointer to a structure (also defined in
93`<<locale.h>>') describing the locale-specific conventions currently
94in effect. 
95
96<<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of
97<<localeconv>> and <<setlocale>> respectively.  The extra argument
98<[reent]> is a pointer to a reentrancy structure.
99
100RETURNS
101A successful call to <<setlocale>> returns a pointer to a string
102associated with the specified category for the new locale.  The string
103returned by <<setlocale>> is such that a subsequent call using that
104string will restore that category (or all categories in case of LC_ALL),
105to that state.  The application shall not modify the string returned
106which may be overwritten by a subsequent call to <<setlocale>>.
107On error, <<setlocale>> returns <<NULL>>.
108
109<<localeconv>> returns a pointer to a structure of type <<lconv>>,
110which describes the formatting and collating conventions in effect (in
111this implementation, always those of the C locale).
112
113PORTABILITY
114ANSI C requires <<setlocale>>, but the only locale required across all
115implementations is the C locale.
116
117NOTES
118There is no ISO-8859-12 codepage.  It's also refused by this implementation.
119
120No supporting OS subroutines are required.
121*/
122
123/* Parts of this code are originally taken from FreeBSD. */
124/*
125 * Copyright (c) 1996 - 2002 FreeBSD Project
126 * Copyright (c) 1991, 1993
127 *      The Regents of the University of California.  All rights reserved.
128 *
129 * This code is derived from software contributed to Berkeley by
130 * Paul Borman at Krystal Technologies.
131 *
132 * Redistribution and use in source and binary forms, with or without
133 * modification, are permitted provided that the following conditions
134 * are met:
135 * 1. Redistributions of source code must retain the above copyright
136 *    notice, this list of conditions and the following disclaimer.
137 * 2. Redistributions in binary form must reproduce the above copyright
138 *    notice, this list of conditions and the following disclaimer in the
139 *    documentation and/or other materials provided with the distribution.
140 * 4. Neither the name of the University nor the names of its contributors
141 *    may be used to endorse or promote products derived from this software
142 *    without specific prior written permission.
143 *
144 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
145 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
146 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
147 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
148 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
149 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
150 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
151 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
152 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
153 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
154 * SUCH DAMAGE.
155 */
156
157#include <newlib.h>
158#include <errno.h>
159#include <string.h>
160#include <limits.h>
161#include <reent.h>
162#include <stdlib.h>
163#include <wchar.h>
164#include "setlocale.h"
165#include "../ctype/ctype_.h"
166#include "../stdlib/local.h"
167
168#ifdef __CYGWIN__ /* Has to be kept available as exported symbol for
169                     backward compatibility.  Set it in setlocale, but
170                     otherwise ignore it.  Applications compiled after
171                     2010 don't use it anymore. */
172int __EXPORT __mb_cur_max = 6;
173#endif
174
175char *_PathLocale = NULL;
176
177#ifdef _MB_CAPABLE
178/*
179 * Category names for getenv()
180 */
181static char *categories[_LC_LAST] = {
182  "LC_ALL",
183  "LC_COLLATE",
184  "LC_CTYPE",
185  "LC_MONETARY",
186  "LC_NUMERIC",
187  "LC_TIME",
188  "LC_MESSAGES",
189};
190#endif /* _MB_CAPABLE */
191
192/*
193 * Default locale per POSIX.  Can be overridden on a per-target base.
194 */
195#ifndef DEFAULT_LOCALE
196#define DEFAULT_LOCALE  "C"
197#endif
198
199#ifdef _MB_CAPABLE
200/*
201 * This variable can be changed by any outside mechanism.  This allows,
202 * for instance, to load the default locale from a file.
203 */
204char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
205
206const struct __locale_t __C_locale =
207{
208  { "C", "C", "C", "C", "C", "C", "C", },
209  __ascii_wctomb,
210  __ascii_mbtowc,
211  0,
212  DEFAULT_CTYPE_PTR,
213  {
214    ".", "", "", "", "", "", "", "", "", "",
215    CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
216    CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
217    CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
218    CHAR_MAX, CHAR_MAX
219  },
220#ifndef __HAVE_LOCALE_INFO__
221  "\1",
222  "ASCII",
223  "ASCII",
224#else /* __HAVE_LOCALE_INFO__ */
225  {
226    { NULL, NULL },                     /* LC_ALL */
227#ifdef __CYGWIN__
228    { &_C_collate_locale, NULL },       /* LC_COLLATE */
229#else
230    { NULL, NULL },                     /* LC_COLLATE */
231#endif
232    { &_C_ctype_locale, NULL },         /* LC_CTYPE */
233    { &_C_monetary_locale, NULL },      /* LC_MONETARY */
234    { &_C_numeric_locale, NULL },       /* LC_NUMERIC */
235    { &_C_time_locale, NULL },          /* LC_TIME */
236    { &_C_messages_locale, NULL },      /* LC_MESSAGES */
237  },
238#endif /* __HAVE_LOCALE_INFO__ */
239};
240#endif /* _MB_CAPABLE */
241
242struct __locale_t __global_locale =
243{
244  { "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
245#ifdef __CYGWIN__
246  __utf8_wctomb,
247  __utf8_mbtowc,
248#else
249  __ascii_wctomb,
250  __ascii_mbtowc,
251#endif
252  0,
253  DEFAULT_CTYPE_PTR,
254  {
255    ".", "", "", "", "", "", "", "", "", "",
256    CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
257    CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
258    CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
259    CHAR_MAX, CHAR_MAX
260  },
261#ifndef __HAVE_LOCALE_INFO__
262  "\1",
263  "ASCII",
264  "ASCII",
265#else /* __HAVE_LOCALE_INFO__ */
266  {
267    { NULL, NULL },                     /* LC_ALL */
268#ifdef __CYGWIN__
269    { &_C_collate_locale, NULL },       /* LC_COLLATE */
270#else
271    { NULL, NULL },                     /* LC_COLLATE */
272#endif
273    { &_C_ctype_locale, NULL },         /* LC_CTYPE */
274    { &_C_monetary_locale, NULL },      /* LC_MONETARY */
275    { &_C_numeric_locale, NULL },       /* LC_NUMERIC */
276    { &_C_time_locale, NULL },          /* LC_TIME */
277    { &_C_messages_locale, NULL },      /* LC_MESSAGES */
278  },
279#endif /* __HAVE_LOCALE_INFO__ */
280};
281
282#ifdef _MB_CAPABLE
283/* Renamed from current_locale_string to make clear this is only the
284   *global* string for setlocale (LC_ALL, NULL).  There's no equivalent
285   functionality for uselocale. */
286static char global_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)];
287static char *currentlocale (void);
288
289#endif /* _MB_CAPABLE */
290
291char *
292_setlocale_r (struct _reent *p,
293       int category,
294       const char *locale)
295{
296#ifndef _MB_CAPABLE
297  if (locale)
298    { 
299      if (strcmp (locale, "POSIX") && strcmp (locale, "C")
300          && strcmp (locale, ""))
301        return NULL;
302    }
303  return "C";
304#else /* _MB_CAPABLE */
305  static char new_categories[_LC_LAST][ENCODING_LEN + 1];
306  static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
307  int i, j, len, saverr;
308  const char *env, *r;
309
310  if (category < LC_ALL || category >= _LC_LAST)
311    {
312      p->_errno = EINVAL;
313      return NULL;
314    }
315
316  if (locale == NULL)
317    return category != LC_ALL ? __get_global_locale ()->categories[category]
318                              : currentlocale();
319
320  /*
321   * Default to the current locale for everything.
322   */
323  for (i = 1; i < _LC_LAST; ++i)
324    strcpy (new_categories[i], __get_global_locale ()->categories[i]);
325
326  /*
327   * Now go fill up new_categories from the locale argument
328   */
329  if (!*locale)
330    {
331      if (category == LC_ALL)
332        {
333          for (i = 1; i < _LC_LAST; ++i)
334            {
335              env = __get_locale_env (p, i);
336              if (strlen (env) > ENCODING_LEN)
337                {
338                  p->_errno = EINVAL;
339                  return NULL;
340                }
341              strcpy (new_categories[i], env);
342            }
343        }
344      else
345        {
346          env = __get_locale_env (p, category);
347          if (strlen (env) > ENCODING_LEN)
348            {
349              p->_errno = EINVAL;
350              return NULL;
351            }
352          strcpy (new_categories[category], env);
353        }
354    }
355  else if (category != LC_ALL)
356    {
357      if (strlen (locale) > ENCODING_LEN)
358        {
359          p->_errno = EINVAL;
360          return NULL;
361        }
362      strcpy (new_categories[category], locale);
363    }
364  else
365    {
366      if ((r = strchr (locale, '/')) == NULL)
367        {
368          if (strlen (locale) > ENCODING_LEN)
369            {
370              p->_errno = EINVAL;
371              return NULL;
372            }
373          for (i = 1; i < _LC_LAST; ++i)
374            strcpy (new_categories[i], locale);
375        }
376      else
377        {
378          for (i = 1; r[1] == '/'; ++r)
379            ;
380          if (!r[1])
381            {
382              p->_errno = EINVAL;
383              return NULL;  /* Hmm, just slashes... */
384            }
385          do
386            {
387              if (i == _LC_LAST)
388                break;  /* Too many slashes... */
389              if ((len = r - locale) > ENCODING_LEN)
390                {
391                  p->_errno = EINVAL;
392                  return NULL;
393                }
394              strlcpy (new_categories[i], locale, len + 1);
395              i++;
396              while (*r == '/')
397                r++;
398              locale = r;
399              while (*r && *r != '/')
400                r++;
401            }
402          while (*locale);
403          while (i < _LC_LAST)
404            {
405              strcpy (new_categories[i], new_categories[i-1]);
406              i++;
407            }
408        }
409    }
410
411  if (category != LC_ALL)
412    return __loadlocale (__get_global_locale (), category,
413                         new_categories[category]);
414
415  for (i = 1; i < _LC_LAST; ++i)
416    {
417      strcpy (saved_categories[i], __get_global_locale ()->categories[i]);
418      if (__loadlocale (__get_global_locale (), i, new_categories[i]) == NULL)
419        {
420          saverr = p->_errno;
421          for (j = 1; j < i; j++)
422            {
423              strcpy (new_categories[j], saved_categories[j]);
424              if (__loadlocale (__get_global_locale (), j, new_categories[j])
425                  == NULL)
426                {
427                  strcpy (new_categories[j], "C");
428                  __loadlocale (__get_global_locale (), j, new_categories[j]);
429                }
430            }
431          p->_errno = saverr;
432          return NULL;
433        }
434    }
435  return currentlocale ();
436#endif /* _MB_CAPABLE */
437}
438
439#ifdef _MB_CAPABLE
440static char *
441currentlocale ()
442{
443  int i;
444
445  strcpy (global_locale_string, __get_global_locale ()->categories[1]);
446
447  for (i = 2; i < _LC_LAST; ++i)
448    if (strcmp (__get_global_locale ()->categories[1],
449                __get_global_locale ()->categories[i]))
450      {
451        for (i = 2; i < _LC_LAST; ++i)
452          {
453            (void)strcat(global_locale_string, "/");
454            (void)strcat(global_locale_string,
455                         __get_global_locale ()->categories[i]);
456          }
457        break;
458      }
459  return global_locale_string;
460}
461
462extern void __set_ctype (struct __locale_t *, const char *charset);
463
464char *
465__loadlocale (struct __locale_t *loc, int category, const char *new_locale)
466{
467  /* At this point a full-featured system would just load the locale
468     specific data from the locale files.
469     What we do here for now is to check the incoming string for correctness.
470     The string must be in one of the allowed locale strings, either
471     one in POSIX-style, or one in the old newlib style to maintain
472     backward compatibility.  If the local string is correct, the charset
473     is extracted and stored in ctype_codeset or message_charset
474     dependent on the cateogry. */
475  char *locale = NULL;
476  char charset[ENCODING_LEN + 1];
477  long val = 0;
478  char *end, *c = NULL;
479  int mbc_max;
480  wctomb_p l_wctomb;
481  mbtowc_p l_mbtowc;
482  int cjknarrow = 0;
483
484  /* Avoid doing everything twice if nothing has changed.
485
486     duplocale relies on this test to go wrong so the locale is actually
487     duplicated when required.  Any change here has to be synced with a
488     matching change in duplocale. */
489  if (!strcmp (new_locale, loc->categories[category]))
490    return loc->categories[category];
491
492#ifdef __CYGWIN__
493  /* This additional code handles the case that the incoming locale string
494     is not valid.  If so, it calls the function __set_locale_from_locale_alias,
495     which is only available on Cygwin right now.  The function reads the
496     file /usr/share/locale/locale.alias.  The file contains locale aliases
497     and their replacement locale.  For instance, the alias "french" is
498     translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
499     "th_TH.TIS-620".  If successful, the function returns with a pointer
500     to the second argument, which is a buffer in which the replacement locale
501     gets stored.  Otherwise the function returns NULL. */
502  char tmp_locale[ENCODING_LEN + 1];
503  int ret = 0;
504
505restart:
506  if (!locale)
507    locale = (char *) new_locale;
508  else if (locale != tmp_locale)
509    {
510      locale = __set_locale_from_locale_alias (locale, tmp_locale);
511      if (!locale)
512        return NULL;
513    }
514# define FAIL   goto restart
515#else
516  locale = new_locale;
517# define FAIL   return NULL
518#endif
519
520  /* "POSIX" is translated to "C", as on Linux. */
521  if (!strcmp (locale, "POSIX"))
522    strcpy (locale, "C");
523  if (!strcmp (locale, "C"))                            /* Default "C" locale */
524    strcpy (charset, "ASCII");
525  else if (locale[0] == 'C'
526           && (locale[1] == '-'         /* Old newlib style */
527               || locale[1] == '.'))    /* Extension for the C locale to allow
528                                           specifying different charsets while
529                                           sticking to the C locale in terms
530                                           of sort order, etc.  Proposed in
531                                           the Debian project. */
532    {
533      char *chp;
534
535      c = locale + 2;
536      strcpy (charset, c);
537      if ((chp = strchr (charset, '@')))
538        /* Strip off modifier */
539        *chp = '\0';
540      c += strlen (charset);
541    }
542  else                                                  /* POSIX style */
543    {
544      c = locale;
545
546      /* Don't use ctype macros here, they might be localized. */
547      /* Language */
548      if (c[0] < 'a' || c[0] > 'z'
549          || c[1] < 'a' || c[1] > 'z')
550        FAIL;
551      c += 2;
552      /* Allow three character Language per ISO 639-3 */
553      if (c[0] >= 'a' && c[0] <= 'z')
554        ++c;
555      if (c[0] == '_')
556        {
557          /* Territory */
558          ++c;
559          if (c[0] < 'A' || c[0] > 'Z'
560              || c[1] < 'A' || c[1] > 'Z')
561            FAIL;
562          c += 2;
563        }
564      if (c[0] == '.')
565        {
566          /* Charset */
567          char *chp;
568
569          ++c;
570          strcpy (charset, c);
571          if ((chp = strchr (charset, '@')))
572            /* Strip off modifier */
573            *chp = '\0';
574          c += strlen (charset);
575        }
576      else if (c[0] == '\0' || c[0] == '@')
577        /* End of string or just a modifier */
578#ifdef __CYGWIN__
579        /* The Cygwin-only function __set_charset_from_locale checks
580           for the default charset which is connected to the given locale.
581           The function uses Windows functions in turn so it can't be easily
582           adapted to other targets.  However, if any other target provides
583           equivalent functionality, preferrably using the same function name
584           it would be sufficient to change the guarding #ifdef. */
585        __set_charset_from_locale (locale, charset);
586#else
587        strcpy (charset, "ISO-8859-1");
588#endif
589      else
590        /* Invalid string */
591        FAIL;
592    }
593  if (c && c[0] == '@')
594    {
595      /* Modifier */
596      /* Only one modifier is recognized right now.  "cjknarrow" is used
597         to modify the behaviour of wcwidth() for East Asian languages.
598         For details see the comment at the end of this function. */
599      if (!strcmp (c + 1, "cjknarrow"))
600        cjknarrow = 1;
601    }
602  /* We only support this subset of charsets. */
603  switch (charset[0])
604    {
605    case 'U':
606    case 'u':
607      if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
608        FAIL;
609      strcpy (charset, "UTF-8");
610      mbc_max = 6;
611      l_wctomb = __utf8_wctomb;
612      l_mbtowc = __utf8_mbtowc;
613    break;
614#ifndef __CYGWIN__
615    /* Cygwin does not support JIS at all. */
616    case 'J':
617    case 'j':
618      if (strcasecmp (charset, "JIS"))
619        FAIL;
620      strcpy (charset, "JIS");
621      mbc_max = 8;
622      l_wctomb = __jis_wctomb;
623      l_mbtowc = __jis_mbtowc;
624    break;
625#endif /* !__CYGWIN__ */
626    case 'E':
627    case 'e':
628      if (strncasecmp (charset, "EUC", 3))
629        FAIL;
630      c = charset + 3;
631      if (*c == '-')
632        ++c;
633      if (!strcasecmp (c, "JP"))
634        {
635          strcpy (charset, "EUCJP");
636          mbc_max = 3;
637          l_wctomb = __eucjp_wctomb;
638          l_mbtowc = __eucjp_mbtowc;
639        }
640#ifdef __CYGWIN__
641      /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
642         implementation requires Windows support. */
643      else if (!strcasecmp (c, "KR"))
644        {
645          strcpy (charset, "EUCKR");
646          mbc_max = 2;
647          l_wctomb = __kr_wctomb;
648          l_mbtowc = __kr_mbtowc;
649        }
650      else if (!strcasecmp (c, "CN"))
651        {
652          strcpy (charset, "EUCCN");
653          mbc_max = 2;
654          l_wctomb = __gbk_wctomb;
655          l_mbtowc = __gbk_mbtowc;
656        }
657#endif /* __CYGWIN__ */
658      else
659        FAIL;
660    break;
661    case 'S':
662    case 's':
663      if (strcasecmp (charset, "SJIS"))
664        FAIL;
665      strcpy (charset, "SJIS");
666      mbc_max = 2;
667      l_wctomb = __sjis_wctomb;
668      l_mbtowc = __sjis_mbtowc;
669    break;
670    case 'I':
671    case 'i':
672      /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
673         ISO-8859-12.  This code also recognizes the aliases without dashes. */
674      if (strncasecmp (charset, "ISO", 3))
675        FAIL;
676      c = charset + 3;
677      if (*c == '-')
678        ++c;
679      if (strncasecmp (c, "8859", 4))
680        FAIL;
681      c += 4;
682      if (*c == '-')
683        ++c;
684      val = strtol (c, &end, 10);
685      if (val < 1 || val > 16 || val == 12 || *end)
686        FAIL;
687      strcpy (charset, "ISO-8859-");
688      c = charset + 9;
689      if (val > 10)
690        *c++ = '1';
691      *c++ = val % 10 + '0';
692      *c = '\0';
693      mbc_max = 1;
694#ifdef _MB_EXTENDED_CHARSETS_ISO
695      l_wctomb = __iso_wctomb (val);
696      l_mbtowc = __iso_mbtowc (val);
697#else /* !_MB_EXTENDED_CHARSETS_ISO */
698      l_wctomb = __ascii_wctomb;
699      l_mbtowc = __ascii_mbtowc;
700#endif /* _MB_EXTENDED_CHARSETS_ISO */
701    break;
702    case 'C':
703    case 'c':
704      if (charset[1] != 'P' && charset[1] != 'p')
705        FAIL;
706      strncpy (charset, "CP", 2);
707      val = strtol (charset + 2, &end, 10);
708      if (*end)
709        FAIL;
710      switch (val)
711        {
712        case 437:
713        case 720:
714        case 737:
715        case 775:
716        case 850:
717        case 852:
718        case 855:
719        case 857:
720        case 858:
721        case 862:
722        case 866:
723        case 874:
724        case 1125:
725        case 1250:
726        case 1251:
727        case 1252:
728        case 1253:
729        case 1254:
730        case 1255:
731        case 1256:
732        case 1257:
733        case 1258:
734          mbc_max = 1;
735#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
736          l_wctomb = __cp_wctomb (val);
737          l_mbtowc = __cp_mbtowc (val);
738#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
739          l_wctomb = __ascii_wctomb;
740          l_mbtowc = __ascii_mbtowc;
741#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
742          break;
743        case 932:
744          mbc_max = 2;
745          l_wctomb = __sjis_wctomb;
746          l_mbtowc = __sjis_mbtowc;
747          break;
748        default:
749          FAIL;
750        }
751    break;
752    case 'K':
753    case 'k':
754      /* KOI8-R, KOI8-U and the aliases without dash */
755      if (strncasecmp (charset, "KOI8", 4))
756        FAIL;
757      c = charset + 4;
758      if (*c == '-')
759        ++c;
760      if (*c == 'R' || *c == 'r')
761        {
762          val = 20866;
763          strcpy (charset, "CP20866");
764        }
765      else if (*c == 'U' || *c == 'u')
766        {
767          val = 21866;
768          strcpy (charset, "CP21866");
769        }
770      else
771        FAIL;
772      mbc_max = 1;
773#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
774      l_wctomb = __cp_wctomb (val);
775      l_mbtowc = __cp_mbtowc (val);
776#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
777      l_wctomb = __ascii_wctomb;
778      l_mbtowc = __ascii_mbtowc;
779#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
780      break;
781    case 'A':
782    case 'a':
783      if (strcasecmp (charset, "ASCII"))
784        FAIL;
785      strcpy (charset, "ASCII");
786      mbc_max = 1;
787      l_wctomb = __ascii_wctomb;
788      l_mbtowc = __ascii_mbtowc;
789      break;
790    case 'G':
791    case 'g':
792#ifdef __CYGWIN__
793      /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
794         requires Windows support. */
795      if (!strcasecmp (charset, "GBK")
796          || !strcasecmp (charset, "GB2312"))
797        {
798          strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
799          mbc_max = 2;
800          l_wctomb = __gbk_wctomb;
801          l_mbtowc = __gbk_mbtowc;
802        }
803      else
804#endif /* __CYGWIN__ */
805      /* GEORGIAN-PS and the alias without dash */
806      if (!strncasecmp (charset, "GEORGIAN", 8))
807        {
808          c = charset + 8;
809          if (*c == '-')
810            ++c;
811          if (strcasecmp (c, "PS"))
812            FAIL;
813          val = 101;
814          strcpy (charset, "CP101");
815          mbc_max = 1;
816#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
817          l_wctomb = __cp_wctomb (val);
818          l_mbtowc = __cp_mbtowc (val);
819#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
820          l_wctomb = __ascii_wctomb;
821          l_mbtowc = __ascii_mbtowc;
822#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
823        }
824      else
825        FAIL;
826      break;
827    case 'P':
828    case 'p':
829      /* PT154 */
830      if (strcasecmp (charset, "PT154"))
831        FAIL;
832      val = 102;
833      strcpy (charset, "CP102");
834      mbc_max = 1;
835#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
836      l_wctomb = __cp_wctomb (val);
837      l_mbtowc = __cp_mbtowc (val);
838#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
839      l_wctomb = __ascii_wctomb;
840      l_mbtowc = __ascii_mbtowc;
841#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
842      break;
843    case 'T':
844    case 't':
845      if (strncasecmp (charset, "TIS", 3))
846        FAIL;
847      c = charset + 3;
848      if (*c == '-')
849        ++c;
850      if (strcasecmp (c, "620"))
851        FAIL;
852      val = 874;
853      strcpy (charset, "CP874");
854      mbc_max = 1;
855#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
856      l_wctomb = __cp_wctomb (val);
857      l_mbtowc = __cp_mbtowc (val);
858#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
859      l_wctomb = __ascii_wctomb;
860      l_mbtowc = __ascii_mbtowc;
861#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
862      break;
863#ifdef __CYGWIN__
864    /* Newlib does not provide Big5 and Cygwin's implementation
865       requires Windows support. */
866    case 'B':
867    case 'b':
868      if (strcasecmp (charset, "BIG5"))
869        FAIL;
870      strcpy (charset, "BIG5");
871      mbc_max = 2;
872      l_wctomb = __big5_wctomb;
873      l_mbtowc = __big5_mbtowc;
874      break;
875#endif /* __CYGWIN__ */
876    default:
877      FAIL;
878    }
879  switch (category)
880    {
881    case LC_CTYPE:
882#ifndef __HAVE_LOCALE_INFO__
883      strcpy (loc->ctype_codeset, charset);
884      loc->mb_cur_max[0] = mbc_max;
885#endif
886#ifdef __CYGWIN__
887      __mb_cur_max = mbc_max;   /* Only for backward compat */
888#endif
889      loc->wctomb = l_wctomb;
890      loc->mbtowc = l_mbtowc;
891      __set_ctype (loc, charset);
892      /* Determine the width for the "CJK Ambiguous Width" category of
893         characters. This is used in wcwidth(). Assume single width for
894         single-byte charsets, and double width for multi-byte charsets
895         other than UTF-8. For UTF-8, use double width for the East Asian
896         languages ("ja", "ko", "zh"), and single width for everything else.
897         Single width can also be forced with the "@cjknarrow" modifier. */
898      loc->cjk_lang = !cjknarrow && mbc_max > 1
899                      && (charset[0] != 'U'
900                          || strncmp (locale, "ja", 2) == 0
901                          || strncmp (locale, "ko", 2) == 0
902                          || strncmp (locale, "zh", 2) == 0);
903#ifdef __HAVE_LOCALE_INFO__
904      ret = __ctype_load_locale (loc, locale, (void *) l_wctomb, charset,
905                                 mbc_max);
906#endif /* __HAVE_LOCALE_INFO__ */
907      break;
908    case LC_MESSAGES:
909#ifdef __HAVE_LOCALE_INFO__
910      ret = __messages_load_locale (loc, locale, (void *) l_wctomb, charset);
911      if (!ret)
912#else
913      strcpy (loc->message_codeset, charset);
914#endif /* __HAVE_LOCALE_INFO__ */
915      break;
916#ifdef __HAVE_LOCALE_INFO__
917#ifdef __CYGWIN__
918  /* Right now only Cygwin supports a __collate_load_locale function at all. */
919    case LC_COLLATE:
920      ret = __collate_load_locale (loc, locale, (void *) l_mbtowc, charset);
921      break;
922#endif
923    case LC_MONETARY:
924      ret = __monetary_load_locale (loc, locale, (void *) l_wctomb, charset);
925      break;
926    case LC_NUMERIC:
927      ret = __numeric_load_locale (loc, locale, (void *) l_wctomb, charset);
928      break;
929    case LC_TIME:
930      ret = __time_load_locale (loc, locale, (void *) l_wctomb, charset);
931      break;
932#endif /* __HAVE_LOCALE_INFO__ */
933    default:
934      break;
935    }
936#ifdef __HAVE_LOCALE_INFO__
937  if (ret)
938    FAIL;
939#endif /* __HAVE_LOCALE_INFO__ */
940  return strcpy(loc->categories[category], new_locale);
941}
942
943const char *
944__get_locale_env (struct _reent *p, int category)
945{
946  const char *env;
947
948  /* 1. check LC_ALL. */
949  env = _getenv_r (p, categories[0]);
950
951  /* 2. check LC_* */
952  if (env == NULL || !*env)
953    env = _getenv_r (p, categories[category]);
954
955  /* 3. check LANG */
956  if (env == NULL || !*env)
957    env = _getenv_r (p, "LANG");
958
959  /* 4. if none is set, fall to default locale */
960  if (env == NULL || !*env)
961    env = __default_locale;
962
963  return env;
964}
965#endif /* _MB_CAPABLE */
966
967int
968__locale_mb_cur_max (void)
969{
970#ifdef __HAVE_LOCALE_INFO__
971  return __get_current_ctype_locale ()->mb_cur_max[0];
972#else
973  return __get_current_locale ()->mb_cur_max[0];
974#endif
975}
976
977const char *
978__locale_ctype_ptr_l (struct __locale_t *locale)
979{
980  return locale->ctype_ptr;
981}
982
983const char *
984__locale_ctype_ptr (void)
985{
986  return __get_current_locale ()->ctype_ptr;
987}
988
989#ifndef _REENT_ONLY
990
991char *
992setlocale (int category,
993        const char *locale)
994{
995  return _setlocale_r (_REENT, category, locale);
996}
997
998#endif
Note: See TracBrowser for help on using the repository browser.