我怎样才能拥有Normalizer方法?

时间:2019-01-04 18:02:32

标签: python-3.x

我想规范一个波斯语列表。但是我无法达到理想的结果。 该功能从文本和单词中删除不必要的字符。

normal = [""","»","http://","www.",
"cloob","instagram","https://","t.me","هه","اا","یی","خخ","وو",
"مم","ـــ","؟؟","!!",":)",":((",":))",
"-*","=))","‌","…","∞","غغ","جج",":-*","نن","‏",
"..."," ","دد",":-*","@};-","کک"]

def normalizer(mylst):
n = ""
retrnlst = []
for snt in mylst:
    #print(snt)
    for n in normal:
        for n in snt:
            if (n=="http://"):
                snt = snt.replace(n, "http:// ")
                retrnlst.append(snt)
            elif (n=="www."):
                snt = snt.replace(n, "www. ")
                retrnlst.append(snt)
            elif (n=="cloob"):
                snt = snt.replace(n, "cloob ")
                retrnlst.append(snt)
            elif (n=="instagram"):
                snt = snt.replace(n, "instagram ")
                retrnlst.append(snt)
            elif (n=="https://"):
                snt = snt.replace(n, "https:// ")
                retrnlst.append(snt)
            elif (n=="t.me"):
                snt = snt.replace(n, "t.me ")
                retrnlst.append(snt)
            elif (n=="هه"):
                snt = snt.replace(n, "ه")
                retrnlst.append(snt)
            elif (n=="اا"):
                snt = snt.replace(n, "ا")
                retrnlst.append(snt)
            elif (n=="يي"):
                snt = snt.replace(n, "ي")
                retrnlst.append(snt)
            elif (n=="خخ"):
                snt = snt.replace(n, "خ")
                retrnlst.append(snt)
            elif (n=="وو"):
                snt = snt.replace(n, "و")
                retrnlst.append(snt)
            elif (n=="مم"):
                snt = snt.replace(n, "م")
                retrnlst.append(snt)
            elif (n=="غغ"):
                snt = snt.replace(n, "غ")
                retrnlst.append(snt)
            elif (n=="نن"):
                snt = snt.replace(n, "ن")
                retrnlst.append(snt)
            elif (n=="دد"):
                snt = snt.replace(n, "د")
                retrnlst.append(snt)
            elif (n=="کک"):
                snt = snt.replace(n, "ک")
                retrnlst.append(snt)
            elif (n=="گگ"):
                snt = snt.replace(n, "گ")
                retrnlst.append(snt)
            elif (n=="فف"):
                snt = snt.replace(n, "ف")
                retrnlst.append(snt)
            elif (n=="جج"):
                snt = snt.replace(n, "ج")
                retrnlst.append(snt)
            else:
                snt = snt.replace(j, "")  
          retrnlst.append(snt)
return retrnlst

##my input string    
var="منننننننن دوستتتتتتتتت دااااااااارممممممممممممم عشقم http://»"
var=normalizer(var)
print(var)

理想的结果是:“ http://مندوستدارمعشقم” 但这是代码的结果:

  

['م','ن','ن','ن','ن','ن','ن','ن','ن',',','د','و' ,'س','ت','ت','ت','ت','ت','ت','ت','ت','ت','','د','ا ','ا','ا','ا','ا','ا','ا','ا','ا','ر','م','م','م', 'م','م','م','م','م','م','م','م','م','م',','ع','ش' ,'ق','م',',',','h','t','t','p',':','/','/','w','w' ,'w','。','&','r','a','q','u','o',';']

0 个答案:

没有答案