From: Paul Eggert Date: Mon, 11 Feb 2002 14:28:09 +0000 (+0000) Subject: Sync with fileutils X-Git-Url: https://git.brokenzipper.com/gitweb?a=commitdiff_plain;h=9ded0a0567b58bfbdf10a8dc98e0158e486466ac;p=chaz%2Ftar Sync with fileutils --- diff --git a/lib/unicodeio.c b/lib/unicodeio.c index a1db6e2..049b9af 100644 --- a/lib/unicodeio.c +++ b/lib/unicodeio.c @@ -1,23 +1,27 @@ /* Unicode character output to streams with locale dependent encoding. - Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 2000-2002 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + USA. */ /* Written by Bruno Haible . */ +/* Note: This file requires the locale_charset() function. See in + libiconv-1.7/libcharset/INTEGRATE for how to obtain it. */ + #ifdef HAVE_CONFIG_H # include #endif @@ -42,20 +46,17 @@ extern int errno; # include #endif -/* Some systems, like SunOS 4, don't have EILSEQ. On these systems, - define EILSEQ to some value other than EINVAL, because our invokers - may want to distinguish EINVAL from EILSEQ. */ -#ifndef EILSEQ -# define EILSEQ ENOENT -#endif -#ifndef ENOTSUP -# define ENOTSUP EINVAL -#endif +#include -#if HAVE_LANGINFO_CODESET && ! USE_INCLUDED_LIBINTL -# include +#if ENABLE_NLS +# include +#else +# define gettext(Text) Text #endif +#define _(Text) gettext (Text) +#define N_(Text) Text +/* Specification. */ #include "unicodeio.h" /* When we pass a Unicode character to iconv(), we must pass it in a @@ -110,17 +111,17 @@ utf8_wctomb (unsigned char *r, unsigned int wc) #define UTF8_NAME "UTF-8" /* Converts the Unicode character CODE to its multibyte representation - in the current locale and calls SUCCESS on the resulting byte - sequence. If an error occurs, invoke FAILURE instead, - passing it CODE with errno set appropriately. - Assumes that the locale doesn't change between two calls. - Return whatever the SUCCESS or FAILURE returns. */ -int + in the current locale and calls the SUCCESS callback on the resulting + byte sequence. If an error occurs, invokes the FAILURE callback instead, + passing it CODE and an English error string. + Returns whatever the callback returned. + Assumes that the locale doesn't change between two calls. */ +long unicode_to_mb (unsigned int code, - int (*success) PARAMS((const char *buf, size_t buflen, - void *callback_arg)), - int (*failure) PARAMS((unsigned int code, - void *callback_arg)), + long (*success) PARAMS ((const char *buf, size_t buflen, + void *callback_arg)), + long (*failure) PARAMS ((unsigned int code, const char *msg, + void *callback_arg)), void *callback_arg) { static int initialized; @@ -134,18 +135,8 @@ unicode_to_mb (unsigned int code, if (!initialized) { - const char *charset; - -#if USE_INCLUDED_LIBINTL extern const char *locale_charset PARAMS ((void)); - charset = locale_charset (); -#else -# if HAVE_LANGINFO_CODESET - charset = nl_langinfo (CODESET); -# else - charset = ""; -# endif -#endif + const char *charset = locale_charset (); is_utf8 = !strcmp (charset, UTF8_NAME); #if HAVE_ICONV @@ -153,32 +144,32 @@ unicode_to_mb (unsigned int code, { utf8_to_local = iconv_open (charset, UTF8_NAME); if (utf8_to_local == (iconv_t)(-1)) - { - /* For an unknown encoding, assume ASCII. */ - utf8_to_local = iconv_open ("ASCII", UTF8_NAME); - if (utf8_to_local == (iconv_t)(-1)) - return failure (code, callback_arg); - } + /* For an unknown encoding, assume ASCII. */ + utf8_to_local = iconv_open ("ASCII", UTF8_NAME); } #endif initialized = 1; } + /* Test whether the utf8_to_local converter is available at all. */ + if (!is_utf8) + { +#if HAVE_ICONV + if (utf8_to_local == (iconv_t)(-1)) + return failure (code, N_("iconv function not usable"), callback_arg); +#else + return failure (code, N_("iconv function not available"), callback_arg); +#endif + } + /* Convert the character to UTF-8. */ count = utf8_wctomb ((unsigned char *) inbuf, code); if (count < 0) - { - errno = EILSEQ; - return failure (code, callback_arg); - } + return failure (code, N_("character out of range"), callback_arg); - if (is_utf8) - { - return success (inbuf, count, callback_arg); - } - else - { #if HAVE_ICONV + if (!is_utf8) + { char outbuf[25]; const char *inptr; size_t inbytesleft; @@ -201,11 +192,7 @@ unicode_to_mb (unsigned int code, || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0') # endif ) - { - if (res != (size_t)(-1)) - errno = EILSEQ; - return failure (code, callback_arg); - } + return failure (code, NULL, callback_arg); /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ @@ -214,46 +201,63 @@ unicode_to_mb (unsigned int code, /* Get back to the initial shift state. */ res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft); if (res == (size_t)(-1)) - return failure (code, callback_arg); + return failure (code, NULL, callback_arg); # endif return success (outbuf, outptr - outbuf, callback_arg); -#else - errno = ENOTSUP; - return failure (code, callback_arg); -#endif } +#endif + + /* At this point, is_utf8 is true, so no conversion is needed. */ + return success (inbuf, count, callback_arg); } /* Simple success callback that outputs the converted string. The STREAM is passed as callback_arg. */ -int -print_unicode_success (const char *buf, size_t buflen, void *callback_arg) +long +fwrite_success_callback (const char *buf, size_t buflen, void *callback_arg) { FILE *stream = (FILE *) callback_arg; - return fwrite (buf, 1, buflen, stream) == 0 ? -1 : 0; + fwrite (buf, 1, buflen, stream); + return 0; } -/* Simple failure callback that prints an ASCII representation, using - the same notation as C99 strings. */ -int -print_unicode_failure (unsigned int code, void *callback_arg) +/* Simple failure callback that displays an error and exits. */ +static long +exit_failure_callback (unsigned int code, const char *msg, void *callback_arg) { - int e = errno; - FILE *stream = callback_arg; - - fprintf (stream, code < 0x10000 ? "\\u%04X" : "\\U%08X", code); - errno = e; + if (msg == NULL) + error (1, 0, _("cannot convert U+%04X to local character set"), code); + else + error (1, 0, _("cannot convert U+%04X to local character set: %s"), code, + gettext (msg)); + return -1; +} + +/* Simple failure callback that displays a fallback representation in plain + ASCII, using the same notation as ISO C99 strings. */ +static long +fallback_failure_callback (unsigned int code, const char *msg, void *callback_arg) +{ + FILE *stream = (FILE *) callback_arg; + + if (code < 0x10000) + fprintf (stream, "\\u%04X", code); + else + fprintf (stream, "\\U%08X", code); return -1; } /* Outputs the Unicode character CODE to the output stream STREAM. - Returns zero if successful, -1 (setting errno) otherwise. - Assumes that the locale doesn't change between two calls. */ -int -print_unicode_char (FILE *stream, unsigned int code) + Upon failure, exit if exit_on_error is true, otherwise output a fallback + notation. */ +void +print_unicode_char (FILE *stream, unsigned int code, int exit_on_error) { - return unicode_to_mb (code, print_unicode_success, print_unicode_failure, - stream); + unicode_to_mb (code, fwrite_success_callback, + exit_on_error + ? exit_failure_callback + : fallback_failure_callback, + stream); }