#!/usr/bin/perl

#
# po2c - Converts .po files to C code
#
# Copyright (C) 2004	  Angel Ortega <angel@triptico.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
# http://www.triptico.com
#

$VERSION = "1.0.2";

if(scalar(@ARGV) == 0)
{
	print "Usage: po2c {po file[s]}\n";
	exit 1;
}

%msgs = ();
%msgids = ();

# stage 1: loading

# arguments are .po files
foreach my $f (@ARGV)
{
	my ($lang);
	my ($langDesc);

	next unless(($lang) = ($f =~ /([^\/]+)\.po$/));

	if(open F, $f)
	{
		my ($msgid, $val, %a);

		while(<F>)
		{
			chomp;

			# ignore blank lines or comments
			next if /^$/ or /^#/;

			if(/^msgid\s+\"(.*)\"\s*$/)
			{
				# store previous msgid
				if(defined($msgid))
				{
					$a{$msgid} = $val;
					$msgids{$msgid} ++;
				}

				# start of msgid
				$val = $1;
			}
			elsif(/^msgstr\s+\"(.*)\"\s*$/)
			{
				# store previous msgid
				$msgid = $val;

				# start of msgstr
				$val = $1;
			}
			elsif(/^\"(.*)\"\s*$/)
			{
				# add to current value
				$val .= $1;
			}
		}

		# store previous msgid
		if(defined($msgid))
		{
			$a{$msgid} = $val;
			$msgids{$msgid} ++;
		}

		close F;

		# add to the global message pool
		$msgs{$lang} = \%a;
	}
}

# stage 2: convert the data

# stores all sorted msgids into @msgids
@msgids = sort(keys(%msgids));

# travels again, storing indexes into %msgids
for(my $n = 0;$n < scalar(@msgids);$n++)
{
	$msgids{$msgids[$n]} = $n;
}

# stage 3: dump as C code

print "/* generated by po2c $VERSION - Do not modify */\n\n";
print "#include <stdio.h>\n";
print "#include <string.h>\n\n";

# dump first the msgid array
print "static const char * _po2c_msgids[] = {\n";

for(my $n = 0;$n < scalar(@msgids);$n++)
{
	print "\t/* $n */ \"" . $msgids[$n] . "\",\n";
}

print "\tNULL\n};\n\n";

# dump the lang structure
print "struct _po2c_msg {\n";
print "\tint msgid;\n";
print "\tconst char * msgstr;\n";
print "};\n\n";

# dump now each language

foreach my $l (keys(%msgs))
{
	print "static struct _po2c_msg _po2c_lang_${l}\[\] = {\n";

	# get the translation table for the language $l
	my ($m) = $msgs{$l};

#	while (my ($msgstr, $msgid) = each (%$m))
	foreach my $msgid (sort(keys(%$m)))
	{
		my ($msgstr) = "";

		# make it 7-bit safe
		foreach $c (split(//, $m->{$msgid})) {
			if (ord($c) > 0x7f) {
				$msgstr .= sprintf("\\%o", ord($c));
			} else {
				$msgstr .= $c;
			}
		}

		print "\t{ " . $msgids{$msgid} . ", \"" . $msgstr . "\" },\n"
			if $msgstr;
	}

	print "\t{ -1, NULL }\n};\n\n";
}

# finally, dump the languages

print "static struct {\n";
print "\tconst char * lang;\n";
print "\tconst char * charset;\n";
print "\tstruct _po2c_msg * msgs;\n";
print "} _po2c_langs[] = {\n";

foreach my $l (keys(%msgs))
{
	$header = $msgs{$l}->{""};
	$header =~ /charset=([^\\]+)/;
	$charset = $1;
	print "\t{ \"" . $l . "\", \"" . $charset . "\", _po2c_lang_${l} },\n";
}

print "\t{ NULL, NULL, NULL }\n};\n\n";

print "/* code */\n";
print << 'EOF';

static struct _po2c_msg * _po2c_lang=NULL;
static int _po2c_lang_size=0;
static const char * _po2c_charset=NULL;

void po2c_setlang(const char * lang)
{
	int n;

	_po2c_lang=NULL;
	_po2c_lang_size=0;
	_po2c_charset=NULL;

	/* if lang is NULL or "", deactivate it */
	if(lang == NULL || *lang == '\0')
		return;

	/* searches for a valid language array */
	for(n=0;_po2c_lang == NULL && _po2c_langs[n].lang != NULL;n++)
	{
		if(strcmp(lang, _po2c_langs[n].lang) == 0) {
			_po2c_lang=_po2c_langs[n].msgs;
			_po2c_charset=_po2c_langs[n].charset;
		}
	}

	/* try partial searches */
	for(n=0;_po2c_lang == NULL && _po2c_langs[n].lang != NULL;n++)
	{
		if(strncmp(lang, _po2c_langs[n].lang, 2) == 0) {
			_po2c_lang=_po2c_langs[n].msgs;
			_po2c_charset=_po2c_langs[n].charset;
		}
	}

	/* if found, count entries */
	if(_po2c_lang != NULL)
	{
		struct _po2c_msg * m;

		for(m=_po2c_lang;m->msgid != -1;m++)
			_po2c_lang_size++;
	}
}

const char * po2c_gettext(const char * msgid)
{
	struct _po2c_msg * m;
	int b, t, n, c;

	/* if no language is set or msgid is empty, return msgid as is */
	if(_po2c_lang == NULL || *msgid == '\0')
		return(msgid);

	/* binary-search for the msgid */
	b=0; t=_po2c_lang_size - 1;

	while(t >= b)
	{
		n=(b + t) / 2;
		m=&_po2c_lang[n];

		c=strcmp(msgid, _po2c_msgids[m->msgid]);

		if(c == 0)
			return(m->msgstr);
		else
		if(c < 0)
			t=n - 1;
		else
			b=n + 1;
	}

	return(msgid);
}

const char * po2c_getcharset(void)
{
	if (_po2c_charset)
		return _po2c_charset;
	else
		return "ASCII";
}

int po2c_getnumlangs(void)
{
	int n = 0;
	while (_po2c_langs[n].lang)
		n++;
	
	return n;
}

const char * po2c_getlang(int num)
{
	return _po2c_langs[num].lang;
}
EOF

exit 0;