Source code for ngram.Corpus

# -*- coding: utf-8 -*-
# The MIT License (MIT)
# (C) முத்தையா அண்ணாமலை 2013-2015

import codecs
from tamil import utf8

[docs]class Corpus: """ Class defines a Corpus data file, and reading information from this file for only the Tamil letters """ def __init__(self,filename): self.filename = filename self.handle = None def __del__(self): try: self.handle.close() except Exception: pass
[docs] def next_tamil_letter(self): self.handle =,'r','utf-8') for letter in utf8.get_letters_iterable( if ( utf8.istamil( letter ) ): yield letter return