prefixes = ['ile','la']
suffixes = ['de','da', "den", "dan", "a", "yi", "yu", "yü", "yı", "ya", "ye", "in", "ın", "tan", "ten", "nin", "nın", "un", "nden", "ndan", "yla", "yle", "li", "lı", "lu", "lü","i", "ı"]
from pyparsing import StringEnd, oneOf, FollowedBy, Optional, ZeroOrMore, SkipTo
wordlist_suffix = ['insaa','insaat','evden','kime', "sirasiyla", "baska", "baskasi", "baskasiyla"]
class SuffixBul(object):
def __init__(self):
self.endOfString = StringEnd()
self.prefix=oneOf("ile ila le la dirler dır tir dirlar ")
self.suffix = oneOf("e a de dan den de da den dan a i ı yi yu yü yı ya ye in ın tan ten nın un nden ndan yla yle li lı lu lü") + FollowedBy(self.endOfString)
self.lst=[]
self.res=[]
def getlist(self):
return self.lst
def getpreffix(self):
return self.lst
def aksiyon(self, wrdlst):
word = (ZeroOrMore(self.prefix)("PREFIXLER") +
SkipTo(self.suffix | self.endOfString)("KaynakKelime") +
Optional(self.suffix)("SUFFIXLER"))
for wd in wrdlst:
print wd
self.res = word.parseString(wd)
print self.res.dump()
print self.res.prefixes
print self.res.root
print self.res.suffix
print
#if len(self.res.root) and len(self.res.suffix)>0:
self.lst.append(self.res)#.root, self.res.prefixes))
def is_content(self, q):
return q.lower() not in self.suffix
def sent(self, wrds):
return wrds.split()
def filter_sent(self, wrd, wrd2):
wrd=[wrd]
return filter(self.is_content(wrd2), wrd)
def filter_sent_sent(self):
return filter(self.is_content, self.sent)
def getir_res(self):
return [x.encode("utf-8") for x in self.res]