add text_languagedetect to guess article language for better hyphenation

(bump schema)
2013-07-31 10:30:14 +04:00 · 2013-07-31 10:30:14 +04:00 · 6b4617970f
parent f035e6dc82
commit 6b4617970f
13 changed files with 2498 additions and 6 deletions
--- a/classes/feeds.php
+++ b/classes/feeds.php
@ -621,7 +621,7 @@ class Feeds extends Handler_Protected {
 					}
 					$reply['content'] .= "</div>";
-					$reply['content'] .= "<div class=\"cdmContentInner\" lang=\"en\">";
+					$reply['content'] .= "<div class=\"cdmContentInner\" lang=\"".$line['lang']."\">";
 			if ($line["orig_feed_id"]) {
--- a/include/functions.php
+++ b/include/functions.php
@ -1,6 +1,6 @@
 <?php
 	define('EXPECTED_CONFIG_VERSION', 26);
-	define('SCHEMA_VERSION', 121);
+	define('SCHEMA_VERSION', 122);
 	define('LABEL_BASE_INDEX', -1024);
 	define('PLUGIN_FEED_BASE_INDEX', -128);
@ -87,6 +87,7 @@
 	require_once "lib/accept-to-gettext.php";
 	require_once "lib/gettext/gettext.inc";
 	require_once "lib/languagedetect/LanguageDetect.php";
 	function startup_gettext() {
@ -2650,6 +2651,7 @@
 						comments,
 						int_id,
 						uuid,
 						lang,
 						hide_images,
 						unread,feed_id,marked,published,link,last_read,orig_feed_id,
 						last_marked, last_published,
@ -2692,6 +2694,7 @@
 								"tag_cache," .
 								"label_cache," .
 								"link," .
 								"lang," .
 								"uuid," .
 								"last_read," .
 								"(SELECT hide_images FROM ttrss_feeds WHERE id = feed_id) AS hide_images," .
@ -3118,7 +3121,7 @@
 			ccache_update($feed_id, $owner_uid);
 		}
-		$result = db_query("SELECT id,title,link,content,feed_id,comments,int_id,
+		$result = db_query("SELECT id,title,link,content,feed_id,comments,int_id,lang,
 			".SUBSTRING_FOR_DATE."(updated,1,16) as updated,
 			(SELECT site_url FROM ttrss_feeds WHERE id = feed_id) as site_url,
 			(SELECT hide_images FROM ttrss_feeds WHERE id = feed_id) as hide_images,
@ -3290,7 +3293,7 @@
 				}
 			$rv['content'] .= "</div>";
-			$rv['content'] .= "<div class=\"postContent\" lang=\"en\">";
+			$rv['content'] .= "<div class=\"postContent\" lang=\"".$line['lang']."\">";
 			$rv['content'] .= $line["content"];
 			$rv['content'] .= format_article_enclosures($id,
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@ -354,6 +354,11 @@
 			$rss->init();
 		}
 		require_once "lib/languagedetect/LanguageDetect.php";
 		$lang = new Text_LanguageDetect();
 		$lang->setNameMode(2);
 //		print_r($rss);
 		$feed = db_escape_string($feed);
@ -565,6 +570,15 @@
 					print "\n";
 				}
 				$entry_language = $lang->detect($entry_content, 1);
 				if (count($entry_language) > 0) {
 					$entry_language = array_keys($entry_language);
 					$entry_language = db_escape_string($entry_language[0]);
 					_debug("detected language: $entry_language", $debug_enabled);
 				}
 				$entry_comments = $item->get_comments_url();
 				$entry_author = $item->get_author();
@ -677,6 +691,7 @@
 							comments,
 							num_comments,
 							plugin_data,
 							lang,
 							author)
 						VALUES
 							('$entry_title',
@ -691,6 +706,7 @@
 							'$entry_comments',
 							'$num_comments',
 							'$entry_plugin_data',
 							'$entry_language',
 							'$entry_author')");
 					$article_labels = array();
--- a/lib/languagedetect/LanguageDetect.php
+++ b/lib/languagedetect/LanguageDetect.php
--- a/lib/languagedetect/Text/LanguageDetect/Exception.php
+++ b/lib/languagedetect/Text/LanguageDetect/Exception.php
@ -0,0 +1,57 @@
 <?php
 class Text_LanguageDetect_Exception extends Exception
 {
    /**
     * Database file could not be found
     */
    const DB_NOT_FOUND = 10;
    /**
     * Database file found, but not readable
     */
    const DB_NOT_READABLE = 11;
    /**
     * Database file is empty
     */
    const DB_EMPTY = 12;
    /**
     * Database contents is not a PHP array
     */
    const DB_NOT_ARRAY = 13;
    /**
     * Magic quotes are activated
     */
    const MAGIC_QUOTES = 14;
    /**
     * Parameter of invalid type passed to method
     */
    const PARAM_TYPE = 20;
    /**
     * Character in parameter is invalid
     */
    const INVALID_CHAR = 21;
    /**
     * Language is not in the database
     */
    const UNKNOWN_LANGUAGE = 30;
    /**
     * Error during block detection
     */
    const BLOCK_DETECTION = 40;
    /**
     * Error while clustering languages
     */
    const NO_HIGHEST_KEY = 50;
 }
--- a/lib/languagedetect/Text/LanguageDetect/ISO639.php
+++ b/lib/languagedetect/Text/LanguageDetect/ISO639.php
@ -0,0 +1,341 @@
 <?php
 /**
 * Part of Text_LanguageDetect
 *
 * PHP version 5
 *
 * @category  Text
 * @package   Text_LanguageDetect
 * @author    Christian Weiske <cweiske@php.net>
 * @copyright 2011 Christian Weiske <cweiske@php.net>
 * @license   http://www.debian.org/misc/bsd.license BSD
 * @version   SVN: $Id$
 * @link      http://pear.php.net/package/Text_LanguageDetect/
 */
 /**
 * Provides a mapping between the languages from lang.dat and the
 * ISO 639-1 and ISO-639-2 codes.
 *
 * Note that this class contains only languages that exist in lang.dat.
 *
 * @category  Text
 * @package   Text_LanguageDetect
 * @author    Christian Weiske <cweiske@php.net>
 * @copyright 2011 Christian Weiske <cweiske@php.net>
 * @license   http://www.debian.org/misc/bsd.license BSD
 * @link      http://www.loc.gov/standards/iso639-2/php/code_list.php
 */
 class Text_LanguageDetect_ISO639
 {
    /**
     * Maps all language names from the language database to the
     * ISO 639-1 2-letter language code.
     *
     * NULL indicates that there is no 2-letter code.
     *
     * @var array
     */
    public static $nameToCode2 = array(
        'albanian'   => 'sq',
        'arabic'     => 'ar',
        'azeri'      => 'az',
        'bengali'    => 'bn',
        'bulgarian'  => 'bg',
        'cebuano'    => null,
        'croatian'   => 'hr',
        'czech'      => 'cs',
        'danish'     => 'da',
        'dutch'      => 'nl',
        'english'    => 'en',
        'estonian'   => 'et',
        'farsi'      => 'fa',
        'finnish'    => 'fi',
        'french'     => 'fr',
        'german'     => 'de',
        'hausa'      => 'ha',
        'hawaiian'   => null,
        'hindi'      => 'hi',
        'hungarian'  => 'hu',
        'icelandic'  => 'is',
        'indonesian' => 'id',
        'italian'    => 'it',
        'kazakh'     => 'kk',
        'kyrgyz'     => 'ky',
        'latin'      => 'la',
        'latvian'    => 'lv',
        'lithuanian' => 'lt',
        'macedonian' => 'mk',
        'mongolian'  => 'mn',
        'nepali'     => 'ne',
        'norwegian'  => 'no',
        'pashto'     => 'ps',
        'pidgin'     => null,
        'polish'     => 'pl',
        'portuguese' => 'pt',
        'romanian'   => 'ro',
        'russian'    => 'ru',
        'serbian'    => 'sr',
        'slovak'     => 'sk',
        'slovene'    => 'sl',
        'somali'     => 'so',
        'spanish'    => 'es',
        'swahili'    => 'sw',
        'swedish'    => 'sv',
        'tagalog'    => 'tl',
        'turkish'    => 'tr',
        'ukrainian'  => 'uk',
        'urdu'       => 'ur',
        'uzbek'      => 'uz',
        'vietnamese' => 'vi',
        'welsh'      => 'cy',
    );
    /**
     * Maps all language names from the language database to the
     * ISO 639-2 3-letter language code.
     *
     * @var array
     */
    public static $nameToCode3 = array(
        'albanian'   => 'sqi',
        'arabic'     => 'ara',
        'azeri'      => 'aze',
        'bengali'    => 'ben',
        'bulgarian'  => 'bul',
        'cebuano'    => 'ceb',
        'croatian'   => 'hrv',
        'czech'      => 'ces',
        'danish'     => 'dan',
        'dutch'      => 'nld',
        'english'    => 'eng',
        'estonian'   => 'est',
        'farsi'      => 'fas',
        'finnish'    => 'fin',
        'french'     => 'fra',
        'german'     => 'deu',
        'hausa'      => 'hau',
        'hawaiian'   => 'haw',
        'hindi'      => 'hin',
        'hungarian'  => 'hun',
        'icelandic'  => 'isl',
        'indonesian' => 'ind',
        'italian'    => 'ita',
        'kazakh'     => 'kaz',
        'kyrgyz'     => 'kir',
        'latin'      => 'lat',
        'latvian'    => 'lav',
        'lithuanian' => 'lit',
        'macedonian' => 'mkd',
        'mongolian'  => 'mon',
        'nepali'     => 'nep',
        'norwegian'  => 'nor',
        'pashto'     => 'pus',
        'pidgin'     => 'crp',
        'polish'     => 'pol',
        'portuguese' => 'por',
        'romanian'   => 'ron',
        'russian'    => 'rus',
        'serbian'    => 'srp',
        'slovak'     => 'slk',
        'slovene'    => 'slv',
        'somali'     => 'som',
        'spanish'    => 'spa',
        'swahili'    => 'swa',
        'swedish'    => 'swe',
        'tagalog'    => 'tgl',
        'turkish'    => 'tur',
        'ukrainian'  => 'ukr',
        'urdu'       => 'urd',
        'uzbek'      => 'uzb',
        'vietnamese' => 'vie',
        'welsh'      => 'cym',
    );
    /**
     * Maps ISO 639-1 2-letter language codes to the language names
     * in the language database
     *
     * Not all languages have a 2 letter code, so some are missing
     *
     * @var array
     */
    public static $code2ToName = array(
        'ar' => 'arabic',
        'az' => 'azeri',
        'bg' => 'bulgarian',
        'bn' => 'bengali',
        'cs' => 'czech',
        'cy' => 'welsh',
        'da' => 'danish',
        'de' => 'german',
        'en' => 'english',
        'es' => 'spanish',
        'et' => 'estonian',
        'fa' => 'farsi',
        'fi' => 'finnish',
        'fr' => 'french',
        'ha' => 'hausa',
        'hi' => 'hindi',
        'hr' => 'croatian',
        'hu' => 'hungarian',
        'id' => 'indonesian',
        'is' => 'icelandic',
        'it' => 'italian',
        'kk' => 'kazakh',
        'ky' => 'kyrgyz',
        'la' => 'latin',
        'lt' => 'lithuanian',
        'lv' => 'latvian',
        'mk' => 'macedonian',
        'mn' => 'mongolian',
        'ne' => 'nepali',
        'nl' => 'dutch',
        'no' => 'norwegian',
        'pl' => 'polish',
        'ps' => 'pashto',
        'pt' => 'portuguese',
        'ro' => 'romanian',
        'ru' => 'russian',
        'sk' => 'slovak',
        'sl' => 'slovene',
        'so' => 'somali',
        'sq' => 'albanian',
        'sr' => 'serbian',
        'sv' => 'swedish',
        'sw' => 'swahili',
        'tl' => 'tagalog',
        'tr' => 'turkish',
        'uk' => 'ukrainian',
        'ur' => 'urdu',
        'uz' => 'uzbek',
        'vi' => 'vietnamese',
    );
    /**
     * Maps ISO 639-2 3-letter language codes to the language names
     * in the language database.
     *
     * @var array
     */
    public static $code3ToName = array(
        'ara' => 'arabic',
        'aze' => 'azeri',
        'ben' => 'bengali',
        'bul' => 'bulgarian',
        'ceb' => 'cebuano',
        'ces' => 'czech',
        'crp' => 'pidgin',
        'cym' => 'welsh',
        'dan' => 'danish',
        'deu' => 'german',
        'eng' => 'english',
        'est' => 'estonian',
        'fas' => 'farsi',
        'fin' => 'finnish',
        'fra' => 'french',
        'hau' => 'hausa',
        'haw' => 'hawaiian',
        'hin' => 'hindi',
        'hrv' => 'croatian',
        'hun' => 'hungarian',
        'ind' => 'indonesian',
        'isl' => 'icelandic',
        'ita' => 'italian',
        'kaz' => 'kazakh',
        'kir' => 'kyrgyz',
        'lat' => 'latin',
        'lav' => 'latvian',
        'lit' => 'lithuanian',
        'mkd' => 'macedonian',
        'mon' => 'mongolian',
        'nep' => 'nepali',
        'nld' => 'dutch',
        'nor' => 'norwegian',
        'pol' => 'polish',
        'por' => 'portuguese',
        'pus' => 'pashto',
        'rom' => 'romanian',
        'rus' => 'russian',
        'slk' => 'slovak',
        'slv' => 'slovene',
        'som' => 'somali',
        'spa' => 'spanish',
        'sqi' => 'albanian',
        'srp' => 'serbian',
        'swa' => 'swahili',
        'swe' => 'swedish',
        'tgl' => 'tagalog',
        'tur' => 'turkish',
        'ukr' => 'ukrainian',
        'urd' => 'urdu',
        'uzb' => 'uzbek',
        'vie' => 'vietnamese',
    );
    /**
     * Returns the 2-letter ISO 639-1 code for the given language name.
     *
     * @param string $lang English language name like "swedish"
     *
     * @return string Two-letter language code (e.g. "sv") or NULL if not found
     */
    public static function nameToCode2($lang)
    {
        $lang = strtolower($lang);
        if (!isset(self::$nameToCode2[$lang])) {
            return null;
        }
        return self::$nameToCode2[$lang];
    }
    /**
     * Returns the 3-letter ISO 639-2 code for the given language name.
     *
     * @param string $lang English language name like "swedish"
     *
     * @return string Three-letter language code (e.g. "swe") or NULL if not found
     */
    public static function nameToCode3($lang)
    {
        $lang = strtolower($lang);
        if (!isset(self::$nameToCode3[$lang])) {
            return null;
        }
        return self::$nameToCode3[$lang];
    }
    /**
     * Returns the language name for the given 2-letter ISO 639-1 code.
     *
     * @param string $code Two-letter language code (e.g. "sv")
     *
     * @return string English language name like "swedish"
     */
    public static function code2ToName($code)
    {
        $lang = strtolower($code);
        if (!isset(self::$code2ToName[$code])) {
            return null;
        }
        return self::$code2ToName[$code];
    }
    /**
     * Returns the language name for the given 3-letter ISO 639-2 code.
     *
     * @param string $code Three-letter language code (e.g. "swe")
     *
     * @return string English language name like "swedish"
     */
    public static function code3ToName($code)
    {
        $lang = strtolower($code);
        if (!isset(self::$code3ToName[$code])) {
            return null;
        }
        return self::$code3ToName[$code];
    }
 }
 ?>
--- a/lib/languagedetect/Text/LanguageDetect/Parser.php
+++ b/lib/languagedetect/Text/LanguageDetect/Parser.php
@ -0,0 +1,349 @@
 <?php
 /**
 * This class represents a text sample to be parsed.
 *
 * @category    Text
 * @package     Text_LanguageDetect
 * @author      Nicholas Pisarro
 * @copyright   2006
 * @license     BSD
 * @version     CVS: $Id: Parser.php 322327 2012-01-15 17:55:59Z cweiske $
 * @link        http://pear.php.net/package/Text_LanguageDetect/
 * @link        http://langdetect.blogspot.com/
 */
 /**
 * This class represents a text sample to be parsed.
 *
 * This separates the analysis of a text sample from the primary LanguageDetect
 * class. After a new profile has been built, the data can be retrieved using
 * the accessor functions.
 *
 * This class is intended to be used by the Text_LanguageDetect class, not 
 * end-users.
 *
 * @category    Text
 * @package     Text_LanguageDetect
 * @author      Nicholas Pisarro
 * @copyright   2006
 * @license     BSD
 * @version     release: 0.3.0
 */
 class Text_LanguageDetect_Parser extends Text_LanguageDetect
 {
    /**
     * the piece of text being parsed
     *
     * @access  private
     * @var     string
     */
    var $_string;
    /**
     * stores the trigram frequencies of the sample
     *
     * @access  private
     * @var     string
     */
    var $_trigrams = array();
    /**
     * stores the trigram ranks of the sample
     *
     * @access  private
     * @var     array
     */
    var $_trigram_ranks = array();
    /**
     * stores the unicode blocks of the sample
     *
     * @access  private
     * @var     array
     */
    var $_unicode_blocks = array();
    /**
     * Whether the parser should compile the unicode ranges
     * 
     * @access  private
     * @var     bool
     */
    var $_compile_unicode = false;
    /**
     * Whether the parser should compile trigrams
     *
     * @access  private
     * @var     bool
     */
    var $_compile_trigram = false;
    /**
     * Whether the trigram parser should pad the beginning of the string
     *
     * @access  private
     * @var     bool
     */
    var $_trigram_pad_start = false;
    /**
     * Whether the unicode parser should skip non-alphabetical ascii chars
     *
     * @access  private
     * @var     bool
     */
    var $_unicode_skip_symbols = true;
    /**
     * Constructor
     *
     * @access  private
     * @param   string  $string     string to be parsed
     */
    function Text_LanguageDetect_Parser($string) {
        $this->_string = $string;
    }
    /**
     * Returns true if a string is suitable for parsing
     *
     * @param   string  $str    input string to test
     * @return  bool            true if acceptable, false if not
     */
    public static function validateString($str) {
        if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
            return true;
        } else {
            return false;
        }
    }
    /**
     * turn on/off trigram counting
     *
     * @access  public
     * @param   bool    $bool true for on, false for off
     */
    function prepareTrigram($bool = true)
    {
        $this->_compile_trigram = $bool;
    }
    /**
     * turn on/off unicode block counting
     *
     * @access  public
     * @param   bool    $bool true for on, false for off
     */
    function prepareUnicode($bool = true)
    {
        $this->_compile_unicode = $bool;
    }
    /**
     * turn on/off padding the beginning of the sample string
     *
     * @access  public
     * @param   bool    $bool true for on, false for off
     */
    function setPadStart($bool = true)
    {
        $this->_trigram_pad_start = $bool;
    }
    /**
     * Should the unicode block counter skip non-alphabetical ascii chars?
     *
     * @access  public
     * @param   bool    $bool true for on, false for off
     */
    function setUnicodeSkipSymbols($bool = true)
    {
        $this->_unicode_skip_symbols = $bool;
    }
    /**
     * Returns the trigram ranks for the text sample
     *
     * @access  public
     * @return  array    trigram ranks in the text sample
     */
    function &getTrigramRanks()
    {
        return $this->_trigram_ranks;
    }
    /**
     * Return the trigram freqency table
     *
     * only used in testing to make sure the parser is working
     *
     * @access  public
     * @return  array    trigram freqencies in the text sample
     */
    function &getTrigramFreqs()
    {
        return $this->_trigram;
    }
    /**
     * returns the array of unicode blocks
     *
     * @access  public
     * @return  array   unicode blocks in the text sample
     */
    function &getUnicodeBlocks()
    {
        return $this->_unicode_blocks;
    }
    /**
     * Executes the parsing operation
     * 
     * Be sure to call the set*() functions to set options and the 
     * prepare*() functions first to tell it what kind of data to compute
     *
     * Afterwards the get*() functions can be used to access the compiled
     * information.
     *
     * @access public
     */
    function analyze()
    {
        $len = strlen($this->_string);
        $byte_counter = 0;
        // unicode startup
        if ($this->_compile_unicode) {
            $blocks = $this->_read_unicode_block_db();
            $block_count = count($blocks);
            $skipped_count = 0;
            $unicode_chars = array();
        }
        // trigram startup
        if ($this->_compile_trigram) {
            // initialize them as blank so the parser will skip the first two
            // (since it skips trigrams with more than  2 contiguous spaces)
            $a = ' ';
            $b = ' ';
            // kludge
            // if it finds a valid trigram to start and the start pad option is
            // off, then set a variable that will be used to reduce this
            // trigram after parsing has finished
            if (!$this->_trigram_pad_start) {
                $a = $this->_next_char($this->_string, $byte_counter, true);
                if ($a != ' ') {
                    $b = $this->_next_char($this->_string, $byte_counter, true);
                    $dropone = " $a$b";
                }
                $byte_counter = 0;
                $a = ' ';
                $b = ' ';
            }
        }
        while ($byte_counter < $len) {
            $char = $this->_next_char($this->_string, $byte_counter, true);
            // language trigram detection
            if ($this->_compile_trigram) {
                if (!($b == ' ' && ($a == ' ' || $char == ' '))) {
                    if (!isset($this->_trigram[$a . $b . $char])) {
                       $this->_trigram[$a . $b . $char] = 1;
                    } else {
                       $this->_trigram[$a . $b . $char]++;
                    }
                }
                $a = $b;
                $b = $char;
            }
            // unicode block detection
            if ($this->_compile_unicode) {
                if ($this->_unicode_skip_symbols
                        && strlen($char) == 1
                        && ($char < 'A' || $char > 'z'
                        || ($char > 'Z' && $char < 'a'))
                        && $char != "'") {  // does not skip the apostrophe
                                            // since it's included in the language
                                            // models
                    $skipped_count++;
                    continue;
                }
                // build an array of all the characters
                if (isset($unicode_chars[$char])) {
                    $unicode_chars[$char]++;
                } else {
                    $unicode_chars[$char] = 1;
                }
            }
            // todo: add byte detection here
        }
        // unicode cleanup
        if ($this->_compile_unicode) {
            foreach ($unicode_chars as $utf8_char => $count) {
                $search_result = $this->_unicode_block_name(
                        $this->_utf8char2unicode($utf8_char), $blocks, $block_count);
                if ($search_result != -1) {
                    $block_name = $search_result[2];
                } else {
                    $block_name = '[Malformatted]';
                }
                if (isset($this->_unicode_blocks[$block_name])) {
                    $this->_unicode_blocks[$block_name] += $count;
                } else {
                    $this->_unicode_blocks[$block_name] = $count;
                }
            }
        }
        // trigram cleanup
        if ($this->_compile_trigram) {
            // pad the end
            if ($b != ' ') {
                if (!isset($this->_trigram["$a$b "])) {
                    $this->_trigram["$a$b "] = 1;
                } else {
                    $this->_trigram["$a$b "]++;
                }
            }
            // perl compatibility; Language::Guess does not pad the beginning
            // kludge
            if (isset($dropone)) {
                if ($this->_trigram[$dropone] == 1) {
                    unset($this->_trigram[$dropone]);
                } else {
                    $this->_trigram[$dropone]--;
                }
            }
            if (!empty($this->_trigram)) {
                $this->_trigram_ranks = $this->_arr_rank($this->_trigram);
            } else {
                $this->_trigram_ranks = array();
            }
        }
    }
 }
 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
 ?>
--- a/lib/languagedetect/data/lang.dat
+++ b/lib/languagedetect/data/lang.dat
--- a/lib/languagedetect/data/unicode_blocks.dat
+++ b/lib/languagedetect/data/unicode_blocks.dat
--- a/schema/ttrss_schema_mysql.sql
+++ b/schema/ttrss_schema_mysql.sql
@ -163,6 +163,7 @@ create table ttrss_entries (id integer not null primary key auto_increment,
 	date_updated datetime not null,
 	num_comments integer not null default 0,
 	plugin_data longtext,
 	lang varchar(2),
 	comments varchar(250) not null default '',
 	author varchar(250) not null default '') ENGINE=InnoDB DEFAULT CHARSET=UTF8;
@ -301,7 +302,7 @@ create table ttrss_tags (id integer primary key auto_increment,
 create table ttrss_version (schema_version int not null) ENGINE=InnoDB DEFAULT CHARSET=UTF8;
-insert into ttrss_version values (121);
+insert into ttrss_version values (122);
 create table ttrss_enclosures (id integer primary key auto_increment,
 	content_url text not null,
--- a/schema/ttrss_schema_pgsql.sql
+++ b/schema/ttrss_schema_pgsql.sql
@ -144,6 +144,7 @@ create table ttrss_entries (id serial not null primary key,
 	num_comments integer not null default 0,
 	comments varchar(250) not null default '',
 	plugin_data text,
 	lang varchar(2),
 	author varchar(250) not null default '');
 create index ttrss_entries_guid_index on ttrss_entries(guid);
@ -259,7 +260,7 @@ create index ttrss_tags_post_int_id_idx on ttrss_tags(post_int_id);
 create table ttrss_version (schema_version int not null);
-insert into ttrss_version values (121);
+insert into ttrss_version values (122);
 create table ttrss_enclosures (id serial not null primary key,
 	content_url text not null,
--- a/schema/versions/mysql/122.sql
+++ b/schema/versions/mysql/122.sql
@ -0,0 +1,7 @@
 begin;
 alter table ttrss_entries add column lang varchar(2);
 update ttrss_version set schema_version = 122;
 commit;
--- a/schema/versions/pgsql/122.sql
+++ b/schema/versions/pgsql/122.sql
@ -0,0 +1,7 @@
 begin;
 alter table ttrss_entries add column lang varchar(2);
 update ttrss_version set schema_version = 122;
 commit;