65890e81f410253c5fed1941a113bea23d814c0e
[Utf8Splitter.git] / txtng3normalizer.py
1 from zope.interface import implements
2 from zopyx.txng3.core.interfaces.normalizer import INormalizer
3 from Utf8Splitter import Utf8Utils
4
5
6 class _Normalizer(object) :
7
8 implements(INormalizer)
9
10 def availableLanguages(self) :
11 return "all"
12
13 def process(self, words, language) :
14 """ Normalize a word or a sequence of words. Returned the normalized word
15 or a sequence of normalized words. If there is no normalizer available
16 for a language then the data is returned unchanged.
17 """
18 return Utf8Utils.udesacc(words)
19
20 def translationTable(self, language) :
21 """ return the translation table for a given language where the
22 translation table is represented as list of tuples (from_str, repl_str)
23 """
24 return Utf8Utils._cache.items()
25
26 Normalizer = _Normalizer()