/* * e-unicode.c - utf-8 support functions for gal * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with the program; if not, see * * * Authors: * Lauris Kaplinski * * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com) * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include #include #include #include "e-unicode.h" #define d(x) #define FONT_TESTING #define MAX_DECOMP 8 /* FIXME: this has not been ported fully yet - non ASCII people beware. */ gchar * e_utf8_from_gtk_event_key (GtkWidget *widget, guint keyval, const gchar *string) { gint unival; gchar *utf; gint unilen; if (keyval == GDK_KEY_VoidSymbol) { utf = e_utf8_from_locale_string_sized (string, strlen (string)); } else { unival = gdk_keyval_to_unicode (keyval); if (unival < ' ') return NULL; utf = g_new (gchar, 7); unilen = e_unichar_to_utf8 (unival, utf); utf[unilen] = '\0'; } return utf; } gchar * e_utf8_from_iconv_string_sized (iconv_t ic, const gchar *string, gint bytes) { gchar *new, *ob; const gchar *ib; gsize ibl, obl; if (!string) return NULL; if (ic == (iconv_t) -1) { gint i; /* iso-8859-1 */ ib = (gchar *) string; new = ob = (gchar *) g_new (guchar, bytes * 2 + 1); for (i = 0; i < (bytes); i++) { ob += e_unichar_to_utf8 (ib[i], ob); } *ob = '\0'; return new; } ib = string; ibl = bytes; new = ob = g_new (gchar, ibl * 6 + 1); obl = ibl * 6; while (ibl > 0) { camel_iconv (ic, &ib, &ibl, &ob, &obl); if (ibl > 0) { gint len; if ((*ib & 0x80) == 0x00) len = 1; else if ((*ib &0xe0) == 0xc0) len = 2; else if ((*ib &0xf0) == 0xe0) len = 3; else if ((*ib &0xf8) == 0xf0) len = 4; else { g_warning ("Invalid UTF-8 sequence"); break; } ib += len; ibl = bytes - (ib - string); if (ibl > bytes) ibl = 0; *ob++ = '_'; obl--; } } *ob = '\0'; return new; } gchar * e_utf8_to_iconv_string_sized (iconv_t ic, const gchar *string, gint bytes) { gchar *new, *ob; const gchar *ib; gsize ibl, obl; if (!string) return NULL; if (ic == (iconv_t) -1) { gint len; const gchar *u; gunichar uc; new = (gchar *) g_new (guchar, bytes * 4 + 1); u = string; len = 0; while ((u) && (u - string < bytes)) { u = e_unicode_get_utf8 (u, &uc); new[len++] = uc & 0xff; } new[len] = '\0'; return new; } ib = string; ibl = bytes; new = ob = g_new (char, ibl * 4 + 4); obl = ibl * 4; while (ibl > 0) { camel_iconv (ic, &ib, &ibl, &ob, &obl); if (ibl > 0) { gint len; if ((*ib & 0x80) == 0x00) len = 1; else if ((*ib &0xe0) == 0xc0) len = 2; else if ((*ib &0xf0) == 0xe0) len = 3; else if ((*ib &0xf8) == 0xf0) len = 4; else { g_warning ("Invalid UTF-8 sequence"); break; } ib += len; ibl = bytes - (ib - string); if (ibl > bytes) ibl = 0; /* FIXME This is wrong. What if the destination * charset is 16 or 32 bit? */ *ob++ = '_'; obl--; } } /* Make sure to terminate with plenty of padding */ memset (ob, 0, 4); return new; } gchar * e_utf8_to_charset_string_sized (const gchar *charset, const gchar *string, gint bytes) { iconv_t ic; gchar *ret; if (!string) return NULL; ic = camel_iconv_open (charset, "utf-8"); ret = e_utf8_to_iconv_string_sized (ic, string, bytes); camel_iconv_close (ic); return ret; } gchar * e_utf8_from_locale_string_sized (const gchar *string, gint bytes) { iconv_t ic; gchar *ret; if (!string) return NULL; ic = camel_iconv_open ("utf-8", camel_iconv_locale_charset ()); ret = e_utf8_from_iconv_string_sized (ic, string, bytes); camel_iconv_close (ic); return ret; } /** * e_utf8_ensure_valid: * @string: string to make valid UTF-8 * * Ensures the returned string will be valid UTF-8 string, thus GTK+ * functions expecting only valid UTF-8 text will not crash. * * Returned pointer should be freed with g_free(). * * Returns: a newly-allocated UTF-8 string **/ gchar * e_utf8_ensure_valid (const gchar *string) { gchar *res = g_strdup (string), *p; if (!res) return res; p = res; while (!g_utf8_validate (p, -1, (const gchar **) &p)) { /* make all invalid characters appear as question marks */ *p = '?'; } return res; } /** * e_unichar_to_utf8: * @c: a ISO10646 character code * @outbuf: output buffer, must have at least 6 bytes of space. * If %NULL, the length will be computed and returned * and nothing will be written to @out. * * Convert a single character to utf8 * * Return value: number of bytes written **/ gint e_unichar_to_utf8 (gint c, gchar *outbuf) { gsize len = 0; gint first; gint i; if (c < 0x80) { first = 0; len = 1; } else if (c < 0x800) { first = 0xc0; len = 2; } else if (c < 0x10000) { first = 0xe0; len = 3; } else if (c < 0x200000) { first = 0xf0; len = 4; } else if (c < 0x4000000) { first = 0xf8; len = 5; } else { first = 0xfc; len = 6; } if (outbuf) { for (i = len - 1; i > 0; --i) { outbuf[i] = (c & 0x3f) | 0x80; c >>= 6; } outbuf[0] = c | first; } return len; } gchar * e_unicode_get_utf8 (const gchar *text, gunichar *out) { *out = g_utf8_get_char (text); return (*out == (gunichar) - 1) ? NULL : g_utf8_next_char (text); } gchar * e_xml_get_translated_utf8_string_prop_by_name (const xmlNode *parent, const xmlChar *prop_name) { xmlChar *prop; gchar *ret_val = NULL; gchar *combined_name; g_return_val_if_fail (parent != NULL, NULL); g_return_val_if_fail (prop_name != NULL, NULL); prop = xmlGetProp ((xmlNode *) parent, prop_name); if (prop != NULL) { ret_val = g_strdup ((gchar *) prop); xmlFree (prop); return ret_val; } combined_name = g_strdup_printf ("_%s", prop_name); prop = xmlGetProp ((xmlNode *) parent, (guchar *) combined_name); if (prop != NULL) { ret_val = g_strdup (gettext ((gchar *) prop)); xmlFree (prop); } g_free (combined_name); return ret_val; }