From 659468ebf0197e682cc70629c3b538e793e765a2 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 5 Mar 2007 09:37:13 +0100 Subject: [PATCH] add php-gettext, i18n stuff --- accept-to-gettext.php | 184 +++++++++++++++++++++ functions.php | 16 ++ gettext/README | 189 ++++++++++++++++++++++ gettext/gettext.inc | 318 ++++++++++++++++++++++++++++++++++++ gettext/gettext.php | 358 +++++++++++++++++++++++++++++++++++++++++ gettext/streams.php | 166 +++++++++++++++++++ index.php | 2 +- prefs.php | 3 + tt-rss.php | 5 +- update-translations.sh | 19 +++ 10 files changed, 1258 insertions(+), 2 deletions(-) create mode 100644 accept-to-gettext.php create mode 100644 gettext/README create mode 100644 gettext/gettext.inc create mode 100644 gettext/gettext.php create mode 100644 gettext/streams.php create mode 100644 update-translations.sh diff --git a/accept-to-gettext.php b/accept-to-gettext.php new file mode 100644 index 000000000..6f366d06a --- /dev/null +++ b/accept-to-gettext.php @@ -0,0 +1,184 @@ + + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Usage: + * + * $locale=al2gt(, + * ); + * setlocale('LC_ALL', $locale); // or 'LC_MESSAGES', or whatever... + * + * Example: + * + * $langs=array('nl_BE.ISO-8859-15','nl_BE.UTF-8','en_US.UTF-8','en_GB.UTF-8'); + * $locale=al2gt($langs, 'text/html'); + * setlocale('LC_ALL', $locale); + * + * Note that this will send out header information (to be + * RFC2616-compliant), so it must be called before anything is sent to + * the user. + * + * Assumptions made: + * * Charset encodings are written the same way as the Accept-Charset + * HTTP header specifies them (RFC2616), except that they're parsed + * case-insensitive. + * * Country codes and language codes are the same in both gettext and + * the Accept-Language syntax (except for the case differences, which + * are dealt with easily). If not, some input may be ignored. + * * The provided gettext-strings are fully qualified; i.e., no "en_US"; + * always "en_US.ISO-8859-15" or "en_US.UTF-8", or whichever has been + * used. "en.ISO-8859-15" is OK, though. + * * The language is more important than the charset; i.e., if the + * following is given: + * + * Accept-Language: nl-be, nl;q=0.8, en-us;q=0.5, en;q=0.3 + * Accept-Charset: ISO-8859-15, utf-8;q=0.5 + * + * And the supplied parameter contains (amongst others) nl_BE.UTF-8 + * and nl.ISO-8859-15, then nl_BE.UTF-8 will be picked. + * + * $Log: accept-to-gettext.inc,v $ + * Revision 1.1.1.1 2003/11/19 19:31:15 wouter + * * moved to new CVS repo after death of the old + * * Fixed code to apply a default to both Accept-Charset and + * Accept-Language if none of those headers are supplied; patch from + * Dominic Chambers + * + * Revision 1.2 2003/08/14 10:23:59 wouter + * Removed little error in Content-Type header syntaxis. + * + */ + +/* not really important, this one; perhaps I could've put it inline with + * the rest. */ +function find_match($curlscore,$curcscore,$curgtlang,$langval,$charval, + $gtlang) +{ + if($curlscore < $langval) { + $curlscore=$langval; + $curcscore=$charval; + $curgtlang=$gtlang; + } else if ($curlscore == $langval) { + if($curcscore < $charval) { + $curcscore=$charval; + $curgtlang=$gtlang; + } + } + return array($curlscore, $curcscore, $curgtlang); +} + +function al2gt($gettextlangs, $mime) { + /* default to "everything is acceptable", as RFC2616 specifies */ + $acceptLang=(($_SERVER["HTTP_ACCEPT_LANGUAGE"] == '') ? '*' : + $_SERVER["HTTP_ACCEPT_LANGUAGE"]); + $acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? '*' : + $_SERVER["HTTP_ACCEPT_CHARSET"]); + $alparts=@preg_split("/,/",$acceptLang); + $acparts=@preg_split("/,/",$acceptChar); + + /* Parse the contents of the Accept-Language header.*/ + foreach($alparts as $part) { + $part=trim($part); + if(preg_match("/;/", $part)) { + $lang=@preg_split("/;/",$part); + $score=@preg_split("/=/",$lang[1]); + $alscores[$lang[0]]=$score[1]; + } else { + $alscores[$part]=1; + } + } + + /* Do the same for the Accept-Charset header. */ + + /* RFC2616: ``If no "*" is present in an Accept-Charset field, then + * all character sets not explicitly mentioned get a quality value of + * 0, except for ISO-8859-1, which gets a quality value of 1 if not + * explicitly mentioned.'' + * + * Making it 2 for the time being, so that we + * can distinguish between "not specified" and "specified as 1" later + * on. */ + $acscores["ISO-8859-1"]=2; + + foreach($acparts as $part) { + $part=trim($part); + if(preg_match("/;/", $part)) { + $cs=@preg_split("/;/",$part); + $score=@preg_split("/=/",$cs[1]); + $acscores[strtoupper($cs[0])]=$score[1]; + } else { + $acscores[strtoupper($part)]=1; + } + } + if($acscores["ISO-8859-1"]==2) { + $acscores["ISO-8859-1"]=(isset($acscores["*"])?$acscores["*"]:1); + } + + /* + * Loop through the available languages/encodings, and pick the one + * with the highest score, excluding the ones with a charset the user + * did not include. + */ + $curlscore=0; + $curcscore=0; + $curgtlang=NULL; + foreach($gettextlangs as $gtlang) { + + $tmp1=preg_replace("/\_/","-",$gtlang); + $tmp2=@preg_split("/\./",$tmp1); + $allang=strtolower($tmp2[0]); + $gtcs=strtoupper($tmp2[1]); + $noct=@preg_split("/-/",$allang); + + $testvals=array( + array($alscores[$allang], $acscores[$gtcs]), + array($alscores[$noct[0]], $acscores[$gtcs]), + array($alscores[$allang], $acscores["*"]), + array($alscores[$noct[0]], $acscores["*"]), + array($alscores["*"], $acscores[$gtcs]), + array($alscores["*"], $acscores["*"])); + + $found=FALSE; + foreach($testvals as $tval) { + if(!$found && isset($tval[0]) && isset($tval[1])) { + $arr=find_match($curlscore, $curcscore, $curgtlang, $tval[0], + $tval[1], $gtlang); + $curlscore=$arr[0]; + $curcscore=$arr[1]; + $curgtlang=$arr[2]; + $found=TRUE; + } + } + } + + /* We must re-parse the gettext-string now, since we may have found it + * through a "*" qualifier.*/ + + $gtparts=@preg_split("/\./",$curgtlang); + $tmp=strtolower($gtparts[0]); + $lang=preg_replace("/\_/", "-", $tmp); + $charset=$gtparts[1]; + + header("Content-Language: $lang"); + header("Content-Type: $mime; charset=$charset"); + + return $curgtlang; +} + +?> diff --git a/functions.php b/functions.php index f714abc4a..2e97ddb2c 100644 --- a/functions.php +++ b/functions.php @@ -18,6 +18,9 @@ require_once "magpierss/rss_fetch.inc"; require_once 'magpierss/rss_utils.inc'; + require_once "accept-to-gettext.php"; + require_once "gettext/gettext.inc"; + function purge_feed($link, $feed_id, $purge_interval, $debug = false) { $rows = -1; @@ -3184,4 +3187,17 @@ $msg"; } + function startup_gettext() { + + # Get locale from Accept-Language header + $lang = al2gt(array("en_US", "ru_RU"), "text/html"); + + if ($lang) { + _setlocale(LC_MESSAGES, $lang); + _bindtextdomain("messages", "locale"); + _textdomain("messages"); + _bind_textdomain_codeset("messages", "UTF-8"); + } + } + ?> diff --git a/gettext/README b/gettext/README new file mode 100644 index 000000000..c7525e29c --- /dev/null +++ b/gettext/README @@ -0,0 +1,189 @@ +PHP-gettext 1.0 + +Copyright 2003, 2006 -- Danilo "angry with PHP[1]" Segan +Licensed under GPLv2 (or any later version, see COPYING) + +[1] PHP is actually cyrillic, and translates roughly to + "works-doesn't-work" (UTF-8: Ради-Не-Ради) + + +Introduction + + How many times did you look for a good translation tool, and + found out that gettext is best for the job? Many times. + + How many times did you try to use gettext in PHP, but failed + miserably, because either your hosting provider didn't support + it, or the server didn't have adequate locale? Many times. + + Well, this is a solution to your needs. It allows using gettext + tools for managing translations, yet it doesn't require gettext + library at all. It parses generated MO files directly, and thus + might be a bit slower than the (maybe provided) gettext library. + + PHP-gettext is a simple reader for GNU gettext MO files. Those + are binary containers for translations, produced by GNU msgfmt. + +Why? + + I got used to having gettext work even without gettext + library. It's there in my favourite language Python, so I was + surprised that I couldn't find it in PHP. I even Googled for it, + but to no avail. + + So, I said, what the heck, I'm going to write it for this + disguisting language of PHP, because I'm often constrained to it. + +Features + + o Support for simple translations + Just define a simple alias for translate() function (suggested + use of _() or gettext(); see provided example). + + o Support for ngettext calls (plural forms, see a note under bugs) + You may also use plural forms. Translations in MO files need to + provide this, and they must also provide "plural-forms" header. + Please see 'info gettext' for more details. + + o Support for reading straight files, or strings (!!!) + Since I can imagine many different backends for reading in the MO + file data, I used imaginary abstract class StreamReader to do all + the input (check streams.php). For your convenience, I've already + provided two classes for reading files: FileReader and + StringReader (CachedFileReader is a combination of the two: it + loads entire file contents into a string, and then works on that). + See example below for usage. You can for instance use StringReader + when you read in data from a database, or you can create your own + derivative of StreamReader for anything you like. + + +Bugs + + Plural-forms field in MO header (translation for empty string, + i.e. "") is treated according to PHP syntactic rules (it's + eval()ed). Since these should actually follow C syntax, there are + some problems. + + For instance, I'm used to using this: + Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : \ + n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2; + but it fails with PHP (it sets $plural=2 instead of 0 for $n==1). + + The fix is usually simple, but I'm lazy to go into the details of + PHP operator precedence, and maybe try to fix it. In here, I had + to put everything after the first ':' in parenthesis: + Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : \ + (n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); + That works, and I'm satisfied. + + Besides this one, there are probably a bunch of other bugs, since + I hate PHP (did I mention it already? no? strange), and don't + know it very well. So, feel free to fix any of those and report + them back to me at . + +Usage + + Put files streams.php and gettext.php somewhere you can load them + from, and require 'em in where you want to use them. + + Then, create one 'stream reader' (a class that provides functions + like read(), seekto(), currentpos() and length()) which will + provide data for the 'gettext_reader', with eg. + $streamer = new FileStream('data.mo'); + + Then, use that as a parameter to gettext_reader constructor: + $wohoo = new gettext_reader($streamer); + + If you want to disable pre-loading of entire message catalog in + memory (if, for example, you have a multi-thousand message catalog + which you'll use only occasionally), use "false" for second + parameter to gettext_reader constructor: + $wohoo = new gettext_reader($streamer, false); + + From now on, you have all the benefits of gettext data at your + disposal, so may run: + print $wohoo->translate("This is a test"); + print $wohoo->ngettext("%d bird", "%d birds", $birds); + + You might need to pass parameter "-k" to xgettext to make it + extract all the strings. In above example, try with + xgettext -ktranslate -kngettext:1,2 file.php + what should create messages.po which contains two messages for + translation. + + I suggest creating simple aliases for these functions (see + example/pigs.php for how do I do it, which means it's probably a + bad way). + + +Usage with gettext.inc (standard gettext interfaces emulation) + + Check example in examples/pig_dropin.php, basically you include + gettext.inc and use all the standard gettext interfaces as + documented on: + + http://www.php.net/gettext + + The only catch is that you can check return value of setlocale() + to see if your locale is system supported or not. + + +Example + + See in examples/ subdirectory. There are a couple of files. + pigs.php is an example, serbian.po is a translation to Serbian + language, and serbian.mo is generated with + msgfmt -o serbian.mo serbian.po + There is also simple "update" script that can be used to generate + POT file and to update the translation using msgmerge. + +Interesting TODO: + + o Try to parse "plural-forms" header field, and to follow C syntax + rules. This won't be easy. + +Boring TODO: + + o Learn PHP and fix bugs, slowness and other stuff resulting from + my lack of knowledge (but *maybe*, it's not my knowledge that is + bad, but PHP itself ;-). + + (This is mostly done thanks to Nico Kaiser.) + + o Try to use hash tables in MO files: with pre-loading, would it + be useful at all? + +Never-asked-questions: + + o Why did you mark this as version 1.0 when this is the first code + release? + + Well, it's quite simple. I consider that the first released thing + should be labeled "version 1" (first, right?). Zero is there to + indicate that there's zero improvement and/or change compared to + "version 1". + + I plan to use version numbers 1.0.* for small bugfixes, and to + release 1.1 as "first stable release of version 1". + + This may trick someone that this is actually useful software, but + as with any other free software, I take NO RESPONSIBILITY for + creating such a masterpiece that will smoke crack, trash your + hard disk, and make lasers in your CD device dance to the tune of + Mozart's 40th Symphony (there is one like that, right?). + + o Can I...? + + Yes, you can. This is free software (as in freedom, free speech), + and you might do whatever you wish with it, provided you do not + limit freedom of others (GPL). + + I'm considering licensing this under LGPL, but I *do* want + *every* PHP-gettext user to contribute and respect ideas of free + software, so don't count on it happening anytime soon. + + I'm sorry that I'm taking away your freedom of taking others' + freedom away, but I believe that's neglible as compared to what + freedoms you could take away. ;-) + + Uhm, whatever. diff --git a/gettext/gettext.inc b/gettext/gettext.inc new file mode 100644 index 000000000..eb94b256a --- /dev/null +++ b/gettext/gettext.inc @@ -0,0 +1,318 @@ + + + Drop in replacement for native gettext. + + This file is part of PHP-gettext. + + PHP-gettext is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + PHP-gettext is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PHP-gettext; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* +LC_CTYPE 0 +LC_NUMERIC 1 +LC_TIME 2 +LC_COLLATE 3 +LC_MONETARY 4 +LC_MESSAGES 5 +LC_ALL 6 +*/ + +require('streams.php'); +require('gettext.php'); + + +// Variables + +global $text_domains, $default_domain, $LC_CATEGORIES, $EMULATEGETTEXT, $CURRENTLOCALE; +$text_domains = array(); +$default_domain = 'messages'; +$LC_CATEGORIES = array('LC_CTYPE', 'LC_NUMERIC', 'LC_TIME', 'LC_COLLATE', 'LC_MONETARY', 'LC_MESSAGES', 'LC_ALL'); +$EMULATEGETTEXT = 0; +$CURRENTLOCALE = ''; + + +// Utility functions + +/** + * Utility function to get a StreamReader for the given text domain. + */ +function _get_reader($domain=null, $category=5, $enable_cache=true) { + global $text_domains, $default_domain, $LC_CATEGORIES; + if (!isset($domain)) $domain = $default_domain; + if (!isset($text_domains[$domain]->l10n)) { + // get the current locale + $locale = _setlocale(LC_MESSAGES, 0); + $p = isset($text_domains[$domain]->path) ? $text_domains[$domain]->path : './'; + $path = $p . "$locale/". $LC_CATEGORIES[$category] ."/$domain.mo"; + if (file_exists($path)) { + $input = new FileReader($path); + } + else { + $input = null; + } + $text_domains[$domain]->l10n = new gettext_reader($input, $enable_cache); + } + return $text_domains[$domain]->l10n; +} + +/** + * Returns whether we are using our emulated gettext API or PHP built-in one. + */ +function locale_emulation() { + global $EMULATEGETTEXT; + return $EMULATEGETTEXT; +} + +/** + * Checks if the current locale is supported on this system. + */ +function _check_locale() { + global $EMULATEGETTEXT; + return !$EMULATEGETTEXT; +} + +/** + * Get the codeset for the given domain. + */ +function _get_codeset($domain=null) { + global $text_domains, $default_domain, $LC_CATEGORIES; + if (!isset($domain)) $domain = $default_domain; + return (isset($text_domains[$domain]->codeset))? $text_domains[$domain]->codeset : ini_get('mbstring.internal_encoding'); +} + +/** + * Convert the given string to the encoding set by bind_textdomain_codeset. + */ +function _encode($text) { + $source_encoding = mb_detect_encoding($text); + $target_encoding = _get_codeset(); + if ($source_encoding != $target_encoding) { + return mb_convert_encoding($text, $target_encoding, $source_encoding); + } + else { + return $text; + } +} + + + + +// Custom implementation of the standard gettext related functions + +/** + * Sets a requested locale, if needed emulates it. + */ +function _setlocale($category, $locale) { + global $CURRENTLOCALE, $EMULATEGETTEXT; + if ($locale === 0) { // use === to differentiate between string "0" + if ($CURRENTLOCALE != '') + return $CURRENTLOCALE; + else + // obey LANG variable, maybe extend to support all of LC_* vars + // even if we tried to read locale without setting it first + return _setlocale($category, $CURRENTLOCALE); + } else { + $ret = 0; + if (function_exists('setlocale')) // I don't know if this ever happens ;) + $ret = setlocale($category, $locale); + if (($ret and $locale == '') or ($ret == $locale)) { + $EMULATEGETTEXT = 0; + $CURRENTLOCALE = $ret; + } else { + if ($locale == '') // emulate variable support + $CURRENTLOCALE = getenv('LANG'); + else + $CURRENTLOCALE = $locale; + $EMULATEGETTEXT = 1; + } + return $CURRENTLOCALE; + } +} + +/** + * Sets the path for a domain. + */ +function _bindtextdomain($domain, $path) { + global $text_domains; + // ensure $path ends with a slash + if ($path[strlen($path) - 1] != '/') $path .= '/'; + elseif ($path[strlen($path) - 1] != '\\') $path .= '\\'; + $text_domains[$domain]->path = $path; +} + +/** + * Specify the character encoding in which the messages from the DOMAIN message catalog will be returned. + */ +function _bind_textdomain_codeset($domain, $codeset) { + global $text_domains; + $text_domains[$domain]->codeset = $codeset; +} + +/** + * Sets the default domain. + */ +function _textdomain($domain) { + global $default_domain; + $default_domain = $domain; +} + +/** + * Lookup a message in the current domain. + */ +function _gettext($msgid) { + $l10n = _get_reader(); + //return $l10n->translate($msgid); + return _encode($l10n->translate($msgid)); +} +/** + * Alias for gettext. + */ +function __($msgid) { + return _gettext($msgid); +} +/** + * Plural version of gettext. + */ +function _ngettext($single, $plural, $number) { + $l10n = _get_reader(); + //return $l10n->ngettext($single, $plural, $number); + return _encode($l10n->ngettext($single, $plural, $number)); +} + +/** + * Override the current domain. + */ +function _dgettext($domain, $msgid) { + $l10n = _get_reader($domain); + //return $l10n->translate($msgid); + return _encode($l10n->translate($msgid)); +} +/** + * Plural version of dgettext. + */ +function _dngettext($domain, $single, $plural, $number) { + $l10n = _get_reader($domain); + //return $l10n->ngettext($single, $plural, $number); + return _encode($l10n->ngettext($single, $plural, $number)); +} + +/** + * Overrides the domain and category for a single lookup. + */ +function _dcgettext($domain, $msgid, $category) { + $l10n = _get_reader($domain, $category); + //return $l10n->translate($msgid); + return _encode($l10n->translate($msgid)); +} +/** + * Plural version of dcgettext. + */ +function _dcngettext($domain, $single, $plural, $number, $category) { + $l10n = _get_reader($domain, $category); + //return $l10n->ngettext($single, $plural, $number); + return _encode($l10n->ngettext($single, $plural, $number)); +} + + + +// Wrappers to use if the standard gettext functions are available, but the current locale is not supported by the system. +// Use the standard impl if the current locale is supported, use the custom impl otherwise. + +function T_setlocale($category, $locale) { + return _setlocale($category, $locale); +} + +function T_bindtextdomain($domain, $path) { + if (_check_locale()) return bindtextdomain($domain, $path); + else return _bindtextdomain($domain, $path); +} +function T_bind_textdomain_codeset($domain, $codeset) { + // bind_textdomain_codeset is available only in PHP 4.2.0+ + if (_check_locale() and function_exists('bind_textdomain_codeset')) return bind_textdomain_codeset($domain, $codeset); + else return _bind_textdomain_codeset($domain, $codeset); +} +function T_textdomain($domain) { + if (_check_locale()) return textdomain($domain); + else return _textdomain($domain); +} +function T_gettext($msgid) { + if (_check_locale()) return gettext($msgid); + else return _gettext($msgid); +} +function T_($msgid) { + if (_check_locale()) return _($msgid); + return __($msgid); +} +function T_ngettext($single, $plural, $number) { + if (_check_locale()) return ngettext($single, $plural, $number); + else return _ngettext($single, $plural, $number); +} +function T_dgettext($domain, $msgid) { + if (_check_locale()) return dgettext($domain, $msgid); + else return _dgettext($domain, $msgid); +} +function T_dngettext($domain, $single, $plural, $number) { + if (_check_locale()) return dngettext($domain, $single, $plural, $number); + else return _dngettext($domain, $single, $plural, $number); +} +function T_dcgettext($domain, $msgid, $category) { + if (_check_locale()) return dcgettext($domain, $msgid, $category); + else return _dcgettext($domain, $msgid, $category); +} +function T_dcngettext($domain, $single, $plural, $number, $category) { + if (_check_locale()) return dcngettext($domain, $single, $plural, $number, $category); + else return _dcngettext($domain, $single, $plural, $number, $category); +} + + + +// Wrappers used as a drop in replacement for the standard gettext functions + +if (!function_exists('gettext')) { + function bindtextdomain($domain, $path) { + return _bindtextdomain($domain, $path); + } + function bind_textdomain_codeset($domain, $codeset) { + return _bind_textdomain_codeset($domain, $codeset); + } + function textdomain($domain) { + return _textdomain($domain); + } + function gettext($msgid) { + return _gettext($msgid); + } + function _($msgid) { + return __($msgid); + } + function ngettext($single, $plural, $number) { + return _ngettext($single, $plural, $number); + } + function dgettext($domain, $msgid) { + return _dgettext($domain, $msgid); + } + function dngettext($domain, $single, $plural, $number) { + return _dngettext($domain, $single, $plural, $number); + } + function dcgettext($domain, $msgid, $category) { + return _dcgettext($domain, $msgid, $category); + } + function dcngettext($domain, $single, $plural, $number, $category) { + return _dcngettext($domain, $single, $plural, $number, $category); + } +} + +?> \ No newline at end of file diff --git a/gettext/gettext.php b/gettext/gettext.php new file mode 100644 index 000000000..ad94a987b --- /dev/null +++ b/gettext/gettext.php @@ -0,0 +1,358 @@ +. + Copyright (c) 2005 Nico Kaiser + + This file is part of PHP-gettext. + + PHP-gettext is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + PHP-gettext is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PHP-gettext; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ + +/** + * Provides a simple gettext replacement that works independently from + * the system's gettext abilities. + * It can read MO files and use them for translating strings. + * The files are passed to gettext_reader as a Stream (see streams.php) + * + * This version has the ability to cache all strings and translations to + * speed up the string lookup. + * While the cache is enabled by default, it can be switched off with the + * second parameter in the constructor (e.g. whenusing very large MO files + * that you don't want to keep in memory) + */ +class gettext_reader { + //public: + var $error = 0; // public variable that holds error code (0 if no error) + + //private: + var $BYTEORDER = 0; // 0: low endian, 1: big endian + var $STREAM = NULL; + var $short_circuit = false; + var $enable_cache = false; + var $originals = NULL; // offset of original table + var $translations = NULL; // offset of translation table + var $pluralheader = NULL; // cache header field for plural forms + var $total = 0; // total string count + var $table_originals = NULL; // table for original strings (offsets) + var $table_translations = NULL; // table for translated strings (offsets) + var $cache_translations = NULL; // original -> translation mapping + + + /* Methods */ + + + /** + * Reads a 32bit Integer from the Stream + * + * @access private + * @return Integer from the Stream + */ + function readint() { + if ($this->BYTEORDER == 0) { + // low endian + return array_shift(unpack('V', $this->STREAM->read(4))); + } else { + // big endian + return array_shift(unpack('N', $this->STREAM->read(4))); + } + } + + /** + * Reads an array of Integers from the Stream + * + * @param int count How many elements should be read + * @return Array of Integers + */ + function readintarray($count) { + if ($this->BYTEORDER == 0) { + // low endian + return unpack('V'.$count, $this->STREAM->read(4 * $count)); + } else { + // big endian + return unpack('N'.$count, $this->STREAM->read(4 * $count)); + } + } + + /** + * Constructor + * + * @param object Reader the StreamReader object + * @param boolean enable_cache Enable or disable caching of strings (default on) + */ + function gettext_reader($Reader, $enable_cache = true) { + // If there isn't a StreamReader, turn on short circuit mode. + if (! $Reader || isset($Reader->error) ) { + $this->short_circuit = true; + return; + } + + // Caching can be turned off + $this->enable_cache = $enable_cache; + + // $MAGIC1 = (int)0x950412de; //bug in PHP 5 + $MAGIC1 = (int) - 1794895138; + // $MAGIC2 = (int)0xde120495; //bug + $MAGIC2 = (int) - 569244523; + + $this->STREAM = $Reader; + $magic = $this->readint(); + if ($magic == $MAGIC1) { + $this->BYTEORDER = 0; + } elseif ($magic == $MAGIC2) { + $this->BYTEORDER = 1; + } else { + $this->error = 1; // not MO file + return false; + } + + // FIXME: Do we care about revision? We should. + $revision = $this->readint(); + + $this->total = $this->readint(); + $this->originals = $this->readint(); + $this->translations = $this->readint(); + } + + /** + * Loads the translation tables from the MO file into the cache + * If caching is enabled, also loads all strings into a cache + * to speed up translation lookups + * + * @access private + */ + function load_tables() { + if (is_array($this->cache_translations) && + is_array($this->table_originals) && + is_array($this->table_translations)) + return; + + /* get original and translations tables */ + $this->STREAM->seekto($this->originals); + $this->table_originals = $this->readintarray($this->total * 2); + $this->STREAM->seekto($this->translations); + $this->table_translations = $this->readintarray($this->total * 2); + + if ($this->enable_cache) { + $this->cache_translations = array (); + /* read all strings in the cache */ + for ($i = 0; $i < $this->total; $i++) { + $this->STREAM->seekto($this->table_originals[$i * 2 + 2]); + $original = $this->STREAM->read($this->table_originals[$i * 2 + 1]); + $this->STREAM->seekto($this->table_translations[$i * 2 + 2]); + $translation = $this->STREAM->read($this->table_translations[$i * 2 + 1]); + $this->cache_translations[$original] = $translation; + } + } + } + + /** + * Returns a string from the "originals" table + * + * @access private + * @param int num Offset number of original string + * @return string Requested string if found, otherwise '' + */ + function get_original_string($num) { + $length = $this->table_originals[$num * 2 + 1]; + $offset = $this->table_originals[$num * 2 + 2]; + if (! $length) + return ''; + $this->STREAM->seekto($offset); + $data = $this->STREAM->read($length); + return (string)$data; + } + + /** + * Returns a string from the "translations" table + * + * @access private + * @param int num Offset number of original string + * @return string Requested string if found, otherwise '' + */ + function get_translation_string($num) { + $length = $this->table_translations[$num * 2 + 1]; + $offset = $this->table_translations[$num * 2 + 2]; + if (! $length) + return ''; + $this->STREAM->seekto($offset); + $data = $this->STREAM->read($length); + return (string)$data; + } + + /** + * Binary search for string + * + * @access private + * @param string string + * @param int start (internally used in recursive function) + * @param int end (internally used in recursive function) + * @return int string number (offset in originals table) + */ + function find_string($string, $start = -1, $end = -1) { + if (($start == -1) or ($end == -1)) { + // find_string is called with only one parameter, set start end end + $start = 0; + $end = $this->total; + } + if (abs($start - $end) <= 1) { + // We're done, now we either found the string, or it doesn't exist + $txt = $this->get_original_string($start); + if ($string == $txt) + return $start; + else + return -1; + } else if ($start > $end) { + // start > end -> turn around and start over + return $this->find_string($string, $end, $start); + } else { + // Divide table in two parts + $half = (int)(($start + $end) / 2); + $cmp = strcmp($string, $this->get_original_string($half)); + if ($cmp == 0) + // string is exactly in the middle => return it + return $half; + else if ($cmp < 0) + // The string is in the upper half + return $this->find_string($string, $start, $half); + else + // The string is in the lower half + return $this->find_string($string, $half, $end); + } + } + + /** + * Translates a string + * + * @access public + * @param string string to be translated + * @return string translated string (or original, if not found) + */ + function translate($string) { + if ($this->short_circuit) + return $string; + $this->load_tables(); + + if ($this->enable_cache) { + // Caching enabled, get translated string from cache + if (array_key_exists($string, $this->cache_translations)) + return $this->cache_translations[$string]; + else + return $string; + } else { + // Caching not enabled, try to find string + $num = $this->find_string($string); + if ($num == -1) + return $string; + else + return $this->get_translation_string($num); + } + } + + /** + * Get possible plural forms from MO header + * + * @access private + * @return string plural form header + */ + function get_plural_forms() { + // lets assume message number 0 is header + // this is true, right? + $this->load_tables(); + + // cache header field for plural forms + if (! is_string($this->pluralheader)) { + if ($this->enable_cache) { + $header = $this->cache_translations[""]; + } else { + $header = $this->get_translation_string(0); + } + if (eregi("plural-forms: ([^\n]*)\n", $header, $regs)) + $expr = $regs[1]; + else + $expr = "nplurals=2; plural=n == 1 ? 0 : 1;"; + $this->pluralheader = $expr; + } + return $this->pluralheader; + } + + /** + * Detects which plural form to take + * + * @access private + * @param n count + * @return int array index of the right plural form + */ + function select_string($n) { + $string = $this->get_plural_forms(); + $string = str_replace('nplurals',"\$total",$string); + $string = str_replace("n",$n,$string); + $string = str_replace('plural',"\$plural",$string); + + $total = 0; + $plural = 0; + + eval("$string"); + if ($plural >= $total) $plural = $total - 1; + return $plural; + } + + /** + * Plural version of gettext + * + * @access public + * @param string single + * @param string plural + * @param string number + * @return translated plural form + */ + function ngettext($single, $plural, $number) { + if ($this->short_circuit) { + if ($number != 1) + return $plural; + else + return $single; + } + + // find out the appropriate form + $select = $this->select_string($number); + + // this should contains all strings separated by NULLs + $key = $single.chr(0).$plural; + + + if ($this->enable_cache) { + if (! array_key_exists($key, $this->cache_translations)) { + return ($number != 1) ? $plural : $single; + } else { + $result = $this->cache_translations[$key]; + $list = explode(chr(0), $result); + return $list[$select]; + } + } else { + $num = $this->find_string($key); + if ($num == -1) { + return ($number != 1) ? $plural : $single; + } else { + $result = $this->get_translation_string($num); + $list = explode(chr(0), $result); + return $list[$select]; + } + } + } + +} + +?> diff --git a/gettext/streams.php b/gettext/streams.php new file mode 100644 index 000000000..d57aac649 --- /dev/null +++ b/gettext/streams.php @@ -0,0 +1,166 @@ +. + + This file is part of PHP-gettext. + + PHP-gettext is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + PHP-gettext is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PHP-gettext; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ + + +// Simple class to wrap file streams, string streams, etc. +// seek is essential, and it should be byte stream +class StreamReader { + // should return a string [FIXME: perhaps return array of bytes?] + function read($bytes) { + return false; + } + + // should return new position + function seekto($position) { + return false; + } + + // returns current position + function currentpos() { + return false; + } + + // returns length of entire stream (limit for seekto()s) + function length() { + return false; + } +} + +class StringReader { + var $_pos; + var $_str; + + function StringReader($str='') { + $this->_str = $str; + $this->_pos = 0; + } + + function read($bytes) { + $data = substr($this->_str, $this->_pos, $bytes); + $this->_pos += $bytes; + if (strlen($this->_str)<$this->_pos) + $this->_pos = strlen($this->_str); + + return $data; + } + + function seekto($pos) { + $this->_pos = $pos; + if (strlen($this->_str)<$this->_pos) + $this->_pos = strlen($this->_str); + return $this->_pos; + } + + function currentpos() { + return $this->_pos; + } + + function length() { + return strlen($this->_str); + } + +} + + +class FileReader { + var $_pos; + var $_fd; + var $_length; + + function FileReader($filename) { + if (file_exists($filename)) { + + $this->_length=filesize($filename); + $this->_pos = 0; + $this->_fd = fopen($filename,'rb'); + if (!$this->_fd) { + $this->error = 3; // Cannot read file, probably permissions + return false; + } + } else { + $this->error = 2; // File doesn't exist + return false; + } + } + + function read($bytes) { + if ($bytes) { + fseek($this->_fd, $this->_pos); + + // PHP 5.1.1 does not read more than 8192 bytes in one fread() + // the discussions at PHP Bugs suggest it's the intended behaviour + while ($bytes > 0) { + $chunk = fread($this->_fd, $bytes); + $data .= $chunk; + $bytes -= strlen($chunk); + } + $this->_pos = ftell($this->_fd); + + return $data; + } else return ''; + } + + function seekto($pos) { + fseek($this->_fd, $pos); + $this->_pos = ftell($this->_fd); + return $this->_pos; + } + + function currentpos() { + return $this->_pos; + } + + function length() { + return $this->_length; + } + + function close() { + fclose($this->_fd); + } + +} + +// Preloads entire file in memory first, then creates a StringReader +// over it (it assumes knowledge of StringReader internals) +class CachedFileReader extends StringReader { + function CachedFileReader($filename) { + if (file_exists($filename)) { + + $length=filesize($filename); + $fd = fopen($filename,'rb'); + + if (!$fd) { + $this->error = 3; // Cannot read file, probably permissions + return false; + } + $this->_str = fread($fd, $length); + fclose($fd); + + } else { + $this->error = 2; // File doesn't exist + return false; + } + } +} + + +?> \ No newline at end of file diff --git a/index.php b/index.php index 1078eedea..e73671ed4 100644 --- a/index.php +++ b/index.php @@ -1 +1 @@ - + diff --git a/prefs.php b/prefs.php index f9ddee82e..eca4e9468 100644 --- a/prefs.php +++ b/prefs.php @@ -2,6 +2,9 @@ error_reporting(E_ERROR | E_WARNING | E_PARSE); require_once "functions.php"; + + startup_gettext(); + require_once "sessions.php"; require_once "sanity_check.php"; require_once "version.php"; diff --git a/tt-rss.php b/tt-rss.php index fe7cc01ae..052a52523 100644 --- a/tt-rss.php +++ b/tt-rss.php @@ -2,6 +2,9 @@ error_reporting(E_ERROR | E_WARNING | E_PARSE); require_once "functions.php"; + + startup_gettext(); + require_once "sessions.php"; require_once "sanity_check.php"; require_once "version.php"; @@ -104,7 +107,7 @@ window.onload = init;