<?php /* * accept-to-gettext.inc -- convert information in 'Accept-*' headers to * gettext language identifiers. * Copyright (c) 2003, Wouter Verhelst <wouter@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Usage: * * $locale=al2gt(<array of supported languages/charsets in gettext syntax>, * <MIME type of document>); * setlocale('LC_ALL', $locale); // or 'LC_MESSAGES', or whatever... * * Example: * * $langs=array('nl_BE.ISO-8859-15','nl_BE.UTF-8','en_US.UTF-8','en_GB.UTF-8'); * $locale=al2gt($langs, 'text/html'); * setlocale('LC_ALL', $locale); * * Note that this will send out header information (to be * RFC2616-compliant), so it must be called before anything is sent to * the user. * * Assumptions made: * * Charset encodings are written the same way as the Accept-Charset * HTTP header specifies them (RFC2616), except that they're parsed * case-insensitive. * * Country codes and language codes are the same in both gettext and * the Accept-Language syntax (except for the case differences, which * are dealt with easily). If not, some input may be ignored. * * The provided gettext-strings are fully qualified; i.e., no "en_US"; * always "en_US.ISO-8859-15" or "en_US.UTF-8", or whichever has been * used. "en.ISO-8859-15" is OK, though. * * The language is more important than the charset; i.e., if the * following is given: * * Accept-Language: nl-be, nl;q=0.8, en-us;q=0.5, en;q=0.3 * Accept-Charset: ISO-8859-15, utf-8;q=0.5 * * And the supplied parameter contains (amongst others) nl_BE.UTF-8 * and nl.ISO-8859-15, then nl_BE.UTF-8 will be picked. * * $Log: accept-to-gettext.inc,v $ * Revision 1.1.1.1 2003/11/19 19:31:15 wouter * * moved to new CVS repo after death of the old * * Fixed code to apply a default to both Accept-Charset and * Accept-Language if none of those headers are supplied; patch from * Dominic Chambers <dominic@encasa.com> * * Revision 1.2 2003/08/14 10:23:59 wouter * Removed little error in Content-Type header syntaxis. * */ /* not really important, this one; perhaps I could've put it inline with * the rest. */ function find_match($curlscore,$curcscore,$curgtlang,$langval,$charval, $gtlang) { if($curlscore < $langval) { $curlscore=$langval; $curcscore=$charval; $curgtlang=$gtlang; } else if ($curlscore == $langval) { if($curcscore < $charval) { $curcscore=$charval; $curgtlang=$gtlang; } } return array($curlscore, $curcscore, $curgtlang); } function al2gt($gettextlangs, $mime) { /* default to "everything is acceptable", as RFC2616 specifies */ $acceptLang=(($_SERVER["HTTP_ACCEPT_LANGUAGE"] == '') ? '*' : $_SERVER["HTTP_ACCEPT_LANGUAGE"]); $acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? '*' : $_SERVER["HTTP_ACCEPT_CHARSET"]); $alparts=@preg_split("/,/",$acceptLang); $acparts=@preg_split("/,/",$acceptChar); /* Parse the contents of the Accept-Language header.*/ foreach($alparts as $part) { $part=trim($part); if(preg_match("/;/", $part)) { $lang=@preg_split("/;/",$part); $score=@preg_split("/=/",$lang[1]); $alscores[$lang[0]]=$score[1]; } else { $alscores[$part]=1; } } /* Do the same for the Accept-Charset header. */ /* RFC2616: ``If no "*" is present in an Accept-Charset field, then * all character sets not explicitly mentioned get a quality value of * 0, except for ISO-8859-1, which gets a quality value of 1 if not * explicitly mentioned.'' * * Making it 2 for the time being, so that we * can distinguish between "not specified" and "specified as 1" later * on. */ $acscores["ISO-8859-1"]=2; foreach($acparts as $part) { $part=trim($part); if(preg_match("/;/", $part)) { $cs=@preg_split("/;/",$part); $score=@preg_split("/=/",$cs[1]); $acscores[strtoupper($cs[0])]=$score[1]; } else { $acscores[strtoupper($part)]=1; } } if($acscores["ISO-8859-1"]==2) { $acscores["ISO-8859-1"]=(isset($acscores["*"])?$acscores["*"]:1); } /* * Loop through the available languages/encodings, and pick the one * with the highest score, excluding the ones with a charset the user * did not include. */ $curlscore=0; $curcscore=0; $curgtlang=NULL; foreach($gettextlangs as $gtlang) { $tmp1=preg_replace("/\_/","-",$gtlang); $tmp2=@preg_split("/\./",$tmp1); $allang=strtolower($tmp2[0]); $gtcs=strtoupper($tmp2[1]); $noct=@preg_split("/-/",$allang); $testvals=array( array($alscores[$allang], $acscores[$gtcs]), array($alscores[$noct[0]], $acscores[$gtcs]), array($alscores[$allang], $acscores["*"]), array($alscores[$noct[0]], $acscores["*"]), array($alscores["*"], $acscores[$gtcs]), array($alscores["*"], $acscores["*"])); $found=FALSE; foreach($testvals as $tval) { if(!$found && isset($tval[0]) && isset($tval[1])) { $arr=find_match($curlscore, $curcscore, $curgtlang, $tval[0], $tval[1], $gtlang); $curlscore=$arr[0]; $curcscore=$arr[1]; $curgtlang=$arr[2]; $found=TRUE; } } } /* We must re-parse the gettext-string now, since we may have found it * through a "*" qualifier.*/ $gtparts=@preg_split("/\./",$curgtlang); $tmp=strtolower($gtparts[0]); $lang=preg_replace("/\_/", "-", $tmp); $charset=$gtparts[1]; header("Content-Language: $lang"); header("Content-Type: $mime; charset=$charset"); return $curgtlang; } ?>