From f2d3cba2316a39e3d27e2e93e52562e72e7bd99d Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Fri, 12 Feb 2021 21:20:04 +0300 Subject: [PATCH] add HTTP_ACCEPT_LANGUAGE handling for php8 --- include/functions.php | 82 ++++++++++++----- include/sessions.php | 1 - lib/accept-to-gettext.php | 186 -------------------------------------- 3 files changed, 57 insertions(+), 212 deletions(-) delete mode 100644 lib/accept-to-gettext.php diff --git a/include/functions.php b/include/functions.php index f870f3382..6362adbbe 100644 --- a/include/functions.php +++ b/include/functions.php @@ -91,14 +91,8 @@ define('SUBSTRING_FOR_DATE', 'SUBSTRING'); } - /** - * Return available translations names. - * - * @access public - * @return array A array of available translations. - */ function get_translations() { - $tr = array( + $t = array( "auto" => __("Detect automatically"), "ar_SA" => "العربيّة (Arabic)", "bg_BG" => "Bulgarian", @@ -129,38 +123,76 @@ "fi_FI" => "Suomi", "tr_TR" => "Türkçe"); - return $tr; + return $t; } - require_once "lib/accept-to-gettext.php"; require_once "lib/gettext/gettext.inc.php"; function startup_gettext() { - # Get locale from Accept-Language header - if (version_compare(PHP_VERSION, '8.0.0', '<')) { - $lang = al2gt(array_keys(get_translations()), "text/html"); - } else { - $lang = ""; // FIXME: do something with accept-to-gettext.php - } + $selected_locale = ""; - if (defined('_TRANSLATION_OVERRIDE_DEFAULT')) { - $lang = _TRANSLATION_OVERRIDE_DEFAULT; - } + // https://www.codingwithjesse.com/blog/use-accept-language-header/ + if (!empty($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { + $valid_langs = []; + $translations = array_keys(get_translations()); - if (!empty($_SESSION["uid"]) && get_schema_version() >= 120) { - $pref_lang = get_pref("USER_LANGUAGE", $_SESSION["uid"]); + array_shift($translations); // remove "auto" - if ($pref_lang && $pref_lang != 'auto') { - $lang = $pref_lang; + // full locale first + foreach ($translations as $t) { + $lang = strtolower(str_replace("_", "-", (string)$t)); + $valid_langs[$lang] = $t; + + $lang = substr($lang, 0, 2); + if (!isset($valid_langs[$lang])) + $valid_langs[$lang] = $t; + } + + // break up string into pieces (languages and q factors) + preg_match_all('/([a-z]{1,8}(-[a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', + $_SERVER['HTTP_ACCEPT_LANGUAGE'], $lang_parse); + + if (count($lang_parse[1])) { + // create a list like "en" => 0.8 + $langs = array_combine($lang_parse[1], $lang_parse[4]); + + if (is_array($langs)) { + // set default to 1 for any without q factor + foreach ($langs as $lang => $val) { + if ($val === '') $langs[$lang] = 1; + } + + // sort list based on value + arsort($langs, SORT_NUMERIC); + + foreach (array_keys($langs) as $lang) { + $lang = strtolower($lang); + + foreach ($valid_langs as $vlang => $vlocale) { + if ($vlang == $lang) { + $selected_locale = $vlocale; + break 2; + } + } + } + } } } - if ($lang) { + if (!empty($_SESSION["uid"]) && get_schema_version() >= 120) { + $pref_locale = get_pref("USER_LANGUAGE", $_SESSION["uid"]); + + if (!empty($pref_locale) && $pref_locale != 'auto') { + $selected_locale = $pref_locale; + } + } + + if ($selected_locale) { if (defined('LC_MESSAGES')) { - _setlocale(LC_MESSAGES, $lang); + _setlocale(LC_MESSAGES, $selected_locale); } else if (defined('LC_ALL')) { - _setlocale(LC_ALL, $lang); + _setlocale(LC_ALL, $selected_locale); } _bindtextdomain("messages", "locale"); diff --git a/include/sessions.php b/include/sessions.php index d7dde782e..3119a4e07 100644 --- a/include/sessions.php +++ b/include/sessions.php @@ -5,7 +5,6 @@ require_once "classes/db.php"; require_once "autoload.php"; require_once "errorhandler.php"; - require_once "lib/accept-to-gettext.php"; require_once "lib/gettext/gettext.inc.php"; $session_expire = min(2147483647 - time() - 1, max(SESSION_COOKIE_LIFETIME, 86400)); diff --git a/lib/accept-to-gettext.php b/lib/accept-to-gettext.php deleted file mode 100644 index c86a62b2e..000000000 --- a/lib/accept-to-gettext.php +++ /dev/null @@ -1,186 +0,0 @@ - - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Usage: - * - * $locale=al2gt(, - * ); - * setlocale('LC_ALL', $locale); // or 'LC_MESSAGES', or whatever... - * - * Example: - * - * $langs=array('nl_BE.ISO-8859-15','nl_BE.UTF-8','en_US.UTF-8','en_GB.UTF-8'); - * $locale=al2gt($langs, 'text/html'); - * setlocale('LC_ALL', $locale); - * - * Note that this will send out header information (to be - * RFC2616-compliant), so it must be called before anything is sent to - * the user. - * - * Assumptions made: - * * Charset encodings are written the same way as the Accept-Charset - * HTTP header specifies them (RFC2616), except that they're parsed - * case-insensitive. - * * Country codes and language codes are the same in both gettext and - * the Accept-Language syntax (except for the case differences, which - * are dealt with easily). If not, some input may be ignored. - * * The provided gettext-strings are fully qualified; i.e., no "en_US"; - * always "en_US.ISO-8859-15" or "en_US.UTF-8", or whichever has been - * used. "en.ISO-8859-15" is OK, though. - * * The language is more important than the charset; i.e., if the - * following is given: - * - * Accept-Language: nl-be, nl;q=0.8, en-us;q=0.5, en;q=0.3 - * Accept-Charset: ISO-8859-15, utf-8;q=0.5 - * - * And the supplied parameter contains (amongst others) nl_BE.UTF-8 - * and nl.ISO-8859-15, then nl_BE.UTF-8 will be picked. - * - * $Log: accept-to-gettext.inc,v $ - * Revision 1.1.1.1 2003/11/19 19:31:15 wouter - * * moved to new CVS repo after death of the old - * * Fixed code to apply a default to both Accept-Charset and - * Accept-Language if none of those headers are supplied; patch from - * Dominic Chambers - * - * Revision 1.2 2003/08/14 10:23:59 wouter - * Removed little error in Content-Type header syntaxis. - * - * 2007-04-01 - * add '@' before use of arrays, to avoid PHP warnings. - */ - -/* not really important, this one; perhaps I could've put it inline with - * the rest. */ -function find_match($curlscore,$curcscore,$curgtlang,$langval,$charval, - $gtlang) -{ - if($curlscore < $langval) { - $curlscore=$langval; - $curcscore=$charval; - $curgtlang=$gtlang; - } else if ($curlscore == $langval) { - if($curcscore < $charval) { - $curcscore=$charval; - $curgtlang=$gtlang; - } - } - return array($curlscore, $curcscore, $curgtlang); -} - -function al2gt($gettextlangs, $mime) { - /* default to "everything is acceptable", as RFC2616 specifies */ - $acceptLang=(($_SERVER["HTTP_ACCEPT_LANGUAGE"] == '') ? '*' : - $_SERVER["HTTP_ACCEPT_LANGUAGE"]); - $acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? '*' : - $_SERVER["HTTP_ACCEPT_CHARSET"]); - $alparts=@preg_split("/,/",$acceptLang); - $acparts=@preg_split("/,/",$acceptChar); - - /* Parse the contents of the Accept-Language header.*/ - foreach($alparts as $part) { - $part=trim($part); - if(preg_match("/;/", $part)) { - $lang=@preg_split("/;/",$part); - $score=@preg_split("/=/",$lang[1]); - $alscores[$lang[0]]=$score[1]; - } else { - $alscores[$part]=1; - } - } - - /* Do the same for the Accept-Charset header. */ - - /* RFC2616: ``If no "*" is present in an Accept-Charset field, then - * all character sets not explicitly mentioned get a quality value of - * 0, except for ISO-8859-1, which gets a quality value of 1 if not - * explicitly mentioned.'' - * - * Making it 2 for the time being, so that we - * can distinguish between "not specified" and "specified as 1" later - * on. */ - $acscores["ISO-8859-1"]=2; - - foreach($acparts as $part) { - $part=trim($part); - if(preg_match("/;/", $part)) { - $cs=@preg_split("/;/",$part); - $score=@preg_split("/=/",$cs[1]); - $acscores[strtoupper($cs[0])]=$score[1]; - } else { - $acscores[strtoupper($part)]=1; - } - } - if($acscores["ISO-8859-1"]==2) { - $acscores["ISO-8859-1"]=(isset($acscores["*"])?$acscores["*"]:1); - } - - /* - * Loop through the available languages/encodings, and pick the one - * with the highest score, excluding the ones with a charset the user - * did not include. - */ - $curlscore=0; - $curcscore=0; - $curgtlang=NULL; - foreach($gettextlangs as $gtlang) { - - $tmp1=preg_replace("/\_/","-",$gtlang); - $tmp2=@preg_split("/\./",$tmp1); - $allang=strtolower($tmp2[0]); - $gtcs=strtoupper($tmp2[1]); - $noct=@preg_split("/-/",$allang); - - $testvals=array( - array(@$alscores[$allang], @$acscores[$gtcs]), - array(@$alscores[$noct[0]], @$acscores[$gtcs]), - array(@$alscores[$allang], @$acscores["*"]), - array(@$alscores[$noct[0]], @$acscores["*"]), - array(@$alscores["*"], @$acscores[$gtcs]), - array(@$alscores["*"], @$acscores["*"])); - - $found=FALSE; - foreach($testvals as $tval) { - if(!$found && isset($tval[0]) && isset($tval[1])) { - $arr=find_match($curlscore, $curcscore, $curgtlang, $tval[0], - $tval[1], $gtlang); - $curlscore=$arr[0]; - $curcscore=$arr[1]; - $curgtlang=$arr[2]; - $found=TRUE; - } - } - } - - /* We must re-parse the gettext-string now, since we may have found it - * through a "*" qualifier.*/ - - $gtparts=@preg_split("/\./",$curgtlang); - $tmp=strtolower($gtparts[0]); - $lang=preg_replace("/\_/", "-", $tmp); - $charset=$gtparts[1]; - - header("Content-Language: $lang"); - header("Content-Type: $mime; charset=$charset"); - - return $curgtlang; -} - -?>