/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; -*- */

/* 
 * Authors:
 *   Michael Zucchi <notzed@ximian.com>
 *   Dan Winship <danw@ximian.com>
 *
 * Copyright 2000, 2001 Ximian, Inc. (www.ximian.com)
 *
 * This program is free software; you can redistribute it and/or 
 * modify it under the terms of the GNU General Public License as 
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>

/*
  if you want to build the charset map, compile this with something like:
    gcc -DBUILD_MAP camel-charset-map.c `glib-config --cflags`
  (plus any -I/-L/-l flags you need for iconv), then run it as 
    ./a.out > camel-charset-map-private.h

  Note that the big-endian variant isn't tested...

  The tables genereated work like this:

   An indirect array for each page of unicode character
   Each array element has an indirect pointer to one of the bytes of
   the generated bitmask.
*/

#ifdef BUILD_MAP
#include <iconv.h>
#include <glib.h>

static struct {
	char *name;
	unsigned int bit;	/* assigned bit */
} tables[] = {
	/* These are the 8bit character sets (other than iso-8859-1,
	 * which is special-cased) which are supported by both other
	 * mailers and the GNOME environment. Note that the order
	 * they're listed in is the order they'll be tried in, so put
	 * the more-popular ones first.
	 */
	{ "iso-8859-2", 0 },	/* Central/Eastern European */
	{ "iso-8859-4", 0 },	/* Baltic */
	{ "koi8-r", 0 },	/* Russian */
	{ "windows-1251", 0 },	/* Russian */
	{ "koi8-u", 0 },	/* Ukranian */
	{ "iso-8859-5", 0 },	/* Least-popular Russian encoding */
	{ "iso-8859-7", 0 },	/* Greek */
	{ "iso-8859-9", 0 },	/* Turkish */
	{ "iso-8859-13", 0 },	/* Baltic again */
	{ "iso-8859-15", 0 },	/* New-and-improved iso-8859-1, but most
				 * programs that support this support UTF8
				 */
	{ 0, 0 }
};

unsigned int encoding_map[256 * 256];

#if G_BYTE_ORDER == G_BIG_ENDIAN
#define UCS "UCS-4BE"
#else
#define UCS "UCS-4LE"
#endif

void main(void)
{
	int i, j;
	int max, min;
	int bit = 0x01;
	int k;
	int bytes;
	iconv_t cd;
	char in[128];
	guint32 out[128];
	char *inptr, *outptr;
	size_t inlen, outlen;

	/* dont count the terminator */
	bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8;

	for (i = 0; i < 128; i++)
		in[i] = i + 128;

	for (j = 0; tables[j].name; j++) {
		cd = iconv_open (UCS, tables[j].name);
		inptr = in;
		outptr = (char *)(out);
		inlen = sizeof (in);
		outlen = sizeof (out);
		while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) {
			if (errno == EILSEQ) {
				inptr++;
				inlen--;
			} else {
				printf ("%s\n", strerror (errno));
				exit (1);
			}
		}
		iconv_close (cd);

		for (i = 0; i < 128 - outlen / 4; i++) {
			encoding_map[i] |= bit;
			encoding_map[out[i]] |= bit;
		}

		tables[j].bit = bit;
		bit <<= 1;
	}

	printf("/* This file is automatically generated: DO NOT EDIT */\n\n");

	for (i=0;i<256;i++) {
		/* first, do we need this block? */
		for (k=0;k<bytes;k++) {
			for (j=0;j<256;j++) {
				if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
					break;
			}
			if (j < 256) {
				/* yes, dump it */
				printf("static unsigned char m%02x%x[256] = {\n\t", i, k);
				for (j=0;j<256;j++) {
					printf("0x%02x, ", (encoding_map[i*256+j] >> (k*8)) & 0xff );
					if (((j+1)&7) == 0 && j<255)
						printf("\n\t");
				}
				printf("\n};\n\n");
			}
		}
	}

	printf("struct {\n");
	for (k=0;k<bytes;k++) {
		printf("\tunsigned char *bits%d;\n", k);
	}
	printf("} camel_charmap[256] = {\n\t");
	for (i=0;i<256;i++) {
		/* first, do we need this block? */
		printf("{ ");
		for (k=0;k<bytes;k++) {
			for (j=0;j<256;j++) {
				if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
					break;
			}
			if (j < 256) {
				printf("m%02x%x, ", i, k);
			} else {
				printf("0, ");
			}
		}
		printf("}, ");
		if (((i+1)&7) == 0 && i<255)
			printf("\n\t");
	}
	printf("\n};\n\n");

	printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n");
	for (j=0;tables[j].name;j++) {
		printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
	}
	printf("};\n\n");

	printf("#define charset_mask(x) \\\n");
	for (k=0;k<bytes;k++) {
		if (k!=0)
			printf("\t| ");
		else
			printf("\t");
		printf("(camel_charmap[(x)>>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8);
		if (k<bytes-1)
			printf("\t\\\n");
	}
	printf("\n\n");

}

#else

#include "camel-charset-map.h"
#include "camel-charset-map-private.h"
#include "hash-table-utils.h"
#include <gal/unicode/gunicode.h>
#include <locale.h>
#include <string.h>
#include <ctype.h>
#include <glib.h>
#include <e-util/e-msgport.h>
#ifdef ENABLE_THREADS
#include <pthread.h>
#endif
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif

#define cd(x)			/* 'cache debug' */

#ifdef ENABLE_THREADS
static pthread_mutex_t iconv_charsets_lock = PTHREAD_MUTEX_INITIALIZER;
#define ICONV_CHARSETS_LOCK() pthread_mutex_lock (&iconv_charsets_lock)
#define ICONV_CHARSETS_UNLOCK() pthread_mutex_unlock (&iconv_charsets_lock)
#else
#define ICONV_CHARSETS_LOCK()
#define ICONV_CHARSETS_UNLOCK()
#endif /* ENABLE_THREADS */

struct _iconv_cache_node {
	EDListNode ln;

	iconv_t ip;
};

struct _iconv_cache {
	EDListNode ln;

	char *conv;

	EDList inuse;		/* opened ic's in use  - if both these lists empty == failed to open conversion */
	EDList free;		/* opened ic's free */
};

#define CAMEL_ICONV_CACHE_SIZE (16)

static EDList iconv_cache_list;

static GHashTable *iconv_charsets = NULL;
static char *locale_charset = NULL;

struct {
	char *charset;
	char *iconv_name;
} known_iconv_charsets[] = {
	/* charset name, iconv-friendly charset name */
	{ "iso-8859-1",     "iso-8859-1" },
	{ "iso8859-1",      "iso-8859-1" },
	/* the above mostly serves as an example for iso-style charsets,
	   but we have code that will populate the iso-*'s if/when they
	   show up in camel_charset_map_to_iconv() so I'm
	   not going to bother putting them all in here... */
	{ "windows-cp1251", "cp1251"     },
	{ "windows-1251",   "cp1251"     },
	{ "cp1251",         "cp1251"     },
	/* the above mostly serves as an example for windows-style
	   charsets, but we have code that will parse and convert them
	   to their cp#### equivalents if/when they show up in
	   camel_charset_map_to_iconv() so I'm not going to bother
	   putting them all in here either... */
	{ "ks_c_5601-1987", "euc-kr"     },
	{ NULL,             NULL         }
};


static void
shutdown_foreach (gpointer key, gpointer value, gpointer data)
{
	g_free (key);
	g_free (value);
}

static void
flush_iconv_entry(struct _iconv_cache *ic)
{
	struct _iconv_cache_node *node;

	cd(printf("Flushing iconv cache entry: %s\n", ic->conv));

	while ( (node = (struct _iconv_cache_node *)e_dlist_remhead(&ic->inuse)) ) {
		iconv_close(node->ip);
		g_free(node);
	}
	while ( (node = (struct _iconv_cache_node *)e_dlist_remhead(&ic->free)) ) {
		iconv_close(node->ip);
		g_free(node);
	}
	g_free(ic->conv);
	g_free(ic);
}

static void
camel_charset_map_shutdown (void)
{
	struct _iconv_cache *ic, *in;

	g_hash_table_foreach (iconv_charsets, shutdown_foreach, NULL);
	g_hash_table_destroy (iconv_charsets);
	g_free (locale_charset);

	ic = (struct _iconv_cache *)iconv_cache_list.head;
	in = (struct _iconv_cache *)ic->ln.next;
	while (in) {
		flush_iconv_entry(ic);
		ic = in;
		in = (struct _iconv_cache *)in->ln.next;
	}
}

void
camel_charset_map_init (void)
{
	char *locale;
	int i;
	
	if (iconv_charsets)
		return;
	
	iconv_charsets = g_hash_table_new (g_strcase_hash, g_strcase_equal);
	for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
		g_hash_table_insert (iconv_charsets, g_strdup (known_iconv_charsets[i].charset),
				     g_strdup (known_iconv_charsets[i].iconv_name));
	}
	
	e_dlist_init(&iconv_cache_list);
	
	locale = setlocale (LC_ALL, NULL);
	
	if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
		/* The locale "C"  or  "POSIX"  is  a  portable  locale;  its
		 * LC_CTYPE  part  corresponds  to  the 7-bit ASCII character
		 * set.
		 */
		
		locale_charset = NULL;
	} else {
		/* A locale name is typically of  the  form  language[_terri-
		 * tory][.codeset][@modifier],  where  language is an ISO 639
		 * language code, territory is an ISO 3166 country code,  and
		 * codeset  is  a  character  set or encoding identifier like
		 * ISO-8859-1 or UTF-8.
		 */
		char *p;
		int len;
		
		p = strchr (locale, '@');
		if (p == NULL)
			p = strchr (locale, '/');  /* This is a hack for Solaris systems */
		
		len = p ? (p - locale) : strlen (locale);
		if ((p = strchr (locale, '.'))) {
			locale_charset = g_strndup (p + 1, len - (p - locale) + 1);
			g_strdown (locale_charset);
		}
	}
	
	g_atexit (camel_charset_map_shutdown);
}

void
camel_charset_init (CamelCharset *c)
{
	c->mask = ~0;
	c->level = 0;
}

void
camel_charset_step (CamelCharset *c, const char *in, int len)
{
	register unsigned int mask;
	register int level;
	const char *inptr = in, *inend = in+len;

	mask = c->mask;
	level = c->level;

	/* check what charset a given string will fit in */
	while (inptr < inend) {
		gunichar c;
		const char *newinptr;
		newinptr = g_utf8_next_char(inptr);
		c = g_utf8_get_char(inptr);
		if (newinptr == NULL || !g_unichar_validate (c)) {
			inptr++;
			continue;
		}

		inptr = newinptr;
		if (c<=0xffff) {
			mask &= charset_mask(c);
		
			if (c>=128 && c<256)
				level = MAX(level, 1);
			else if (c>=256)
				level = MAX(level, 2);
		} else {
			mask = 0;
			level = MAX(level, 2);
		}
	}

	c->mask = mask;
	c->level = level;
}

/* gets the best charset from the mask of chars in it */
static const char *
camel_charset_best_mask(unsigned int mask)
{
	int i;

	for (i=0;i<sizeof(camel_charinfo)/sizeof(camel_charinfo[0]);i++) {
		if (camel_charinfo[i].bit & mask)
			return camel_charinfo[i].name;
	}
	return "UTF-8";
}

const char *
camel_charset_best_name (CamelCharset *charset)
{
	if (charset->level == 1)
		return "ISO-8859-1";
	else if (charset->level == 2)
		return camel_charset_best_mask (charset->mask);
	else
		return NULL;

}

/* finds the minimum charset for this string NULL means US-ASCII */
const char *
camel_charset_best (const char *in, int len)
{
	CamelCharset charset;

	camel_charset_init (&charset);
	camel_charset_step (&charset, in, len);
	return camel_charset_best_name (&charset);
}

const char *
camel_charset_locale_name (void)
{
	return locale_charset;
}

const char *
camel_charset_to_iconv (const char *name)
{
	const char *charset;
	
	if (name == NULL)
		return NULL;
	
	ICONV_CHARSETS_LOCK ();
	charset = g_hash_table_lookup (iconv_charsets, name);
	if (!charset) {
		/* Attempt to friendlyify the charset */
		char *new_charset, *p;
		int len;
		
		if (!g_strncasecmp (name, "iso", 3) && name[3] != '-' && name[3] != '_') {
			/* Hack to convert charsets like ISO8859-1 to iconv-friendly ISO-8859-1 */
			len = strlen (name);
			new_charset = g_malloc (len + 2);
			memcpy (new_charset, name, 3);
			new_charset[3] = '-';
			memcpy (new_charset + 4, name + 3, len - 3);
			new_charset[len + 1] = '\0';
		} else if (!g_strncasecmp (name, "windows-", 8)) {
			/* Convert charsets like windows-1251 and windows-cp1251 to iconv-friendly cp1251 */
			new_charset = (char *) name + 8;
			if (!g_strncasecmp (new_charset, "cp", 2))
				new_charset += 2;
			
			for (p = new_charset; *p && isdigit ((unsigned) *p); p++);
			if (*p == '\0')
				new_charset = g_strdup_printf ("cp%s", new_charset);
			else
				new_charset = g_strdup (name);
		} else {
			/* *shrug* - add it to the hash table just the way it is? */
			new_charset = g_strdup (name);
		}
		
		g_hash_table_insert (iconv_charsets, g_strdup (name), new_charset);
		charset = new_charset;
	}
	ICONV_CHARSETS_UNLOCK ();
	
	return charset;
}

iconv_t camel_charset_iconv_open(const char *oto, const char *ofrom)
{
	const char *to, *from;
	char *tofrom;
	struct _iconv_cache *ic, *icnew = NULL;
	struct _iconv_cache_node *node;
	iconv_t ip;

	to = camel_charset_to_iconv(oto);
	from = camel_charset_to_iconv(ofrom);
	tofrom = alloca(strlen(to) +strlen(from) + 1);
	sprintf(tofrom, "%s%s", to, from);

	ICONV_CHARSETS_LOCK();
	ic = (struct _iconv_cache *)iconv_cache_list.head;
	while (ic->ln.next) {
		if (!strcasecmp(ic->conv, tofrom))
			break;
		ic = (struct _iconv_cache *)ic->ln.next;
	}

	if (ic->ln.next == NULL) {
		int extra = e_dlist_length(&iconv_cache_list) - CAMEL_ICONV_CACHE_SIZE;
		struct _iconv_cache *old = (struct _iconv_cache *)iconv_cache_list.head,
			*next = (struct _iconv_cache *)old->ln.next;

		/* flush any 'old' entries out, if we can */
		while (extra>0 && next) {
			if (e_dlist_empty(&old->inuse)) {
				e_dlist_remove(&old->ln);
				flush_iconv_entry(old);
				extra--;
			}
			old = next;
			next = (struct _iconv_cache *)old->ln.next;
		}

		icnew = ic = g_malloc(sizeof(*ic));
		e_dlist_init(&ic->inuse);
		e_dlist_init(&ic->free);
		ic->conv = g_strdup(tofrom);
	} else {
		e_dlist_remove(&ic->ln);
	}

	node = (struct _iconv_cache_node *)e_dlist_remhead(&ic->free);
	if (node) {
		cd(printf("Returning cached success of: %s to %s\n", from, to));
		e_dlist_addhead(&ic->inuse, &node->ln);
		ip = node->ip;
	} else {
		if (e_dlist_empty(&ic->inuse) && icnew == NULL) {
			cd(printf("returning cached failure of conversion: %s to %s\n", from, to));
			ip = (iconv_t)-1;
		} else {
			ip = iconv_open(to, from);
			if (ip != (iconv_t)-1) {
				cd(printf("Creating cached opening of: %s to %s = %p\n", from, to, ip));
				node = g_malloc(sizeof(*node));
				node->ip = ip;
				e_dlist_addhead(&ic->inuse, &node->ln);
			}
		}
	}

	e_dlist_addtail(&iconv_cache_list, &ic->ln);

	ICONV_CHARSETS_UNLOCK();

	return ip;
}

void camel_charset_iconv_close(iconv_t ip)
{
	struct _iconv_cache *ic;
	struct _iconv_cache_node *node;

	if (ip == (iconv_t)-1)
		return;

	ICONV_CHARSETS_LOCK();
	ic = (struct _iconv_cache *)iconv_cache_list.tailpred;
	while (ic->ln.prev) {
		cd(printf("closing iconv %p, checking against name '%s'\n", ip, ic->conv));
		node = (struct _iconv_cache_node *)ic->inuse.head;
		while (node->ln.next) {
			cd(printf("closing iconv %p, checking against node '%p'\n", ip, node->ip));
			if (node->ip == ip) {
				e_dlist_remove(&node->ln);
				e_dlist_addhead(&ic->free, &node->ln);
				ICONV_CHARSETS_UNLOCK();
				return;
			}
			node = (struct _iconv_cache_node *)node->ln.next;
		}
		ic = (struct _iconv_cache *)ic->ln.prev;
	}

	ICONV_CHARSETS_UNLOCK();

	g_warning("Trying to close iconv i dont know about: %p", ip);
}

#endif /* !BUILD_MAP */