add more english common words to bayes ignore list

This commit is contained in:
Andrew Dolgov 2015-06-18 08:42:17 +03:00
parent 5613bb3584
commit ef7395f170
1 changed files with 12 additions and 1 deletions

View File

@ -226,7 +226,18 @@
@return array ignore list
*/
function getIgnoreList() {
return array('the', 'that', 'you', 'for', 'and');
//return array('the', 'that', 'you', 'for', 'and');
// https://en.wikipedia.org/wiki/Most_common_words_in_English
return array('the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'I', 'it', 'for', 'not', 'on', 'with',
'he', 'as', 'you', 'do', 'at', 'this', 'but', 'his', 'by', 'from', 'they', 'we', 'say', 'her',
'she', 'or', 'an', 'will', 'my', 'one', 'all', 'would', 'there', 'their', 'what', 'so', 'up',
'out', 'if', 'about', 'who', 'get', 'which', 'go', 'me', 'when', 'make', 'can', 'like', 'time',
'no', 'just', 'him', 'know', 'take', 'people', 'into', 'year', 'your', 'good', 'some', 'could',
'them', 'see', 'other', 'than', 'then', 'now', 'look', 'only', 'come', 'its', 'over', 'think',
'also', 'back', 'after', 'use', 'two', 'how', 'our', 'work', 'first', 'well', 'way', 'even',
'new', 'want', 'because', 'any', 'these', 'give', 'day', 'most', 'us', 'read', 'more');
}
/** get the tokens from a string