namefile=["pavlo002.txt","pavlo001.txt","panch001.txt","panch002.txt"]
huha=5
ps1="%8.5f "
import math
du=0
def vix(hkw,hig):
hig1=hig+du
hkw1=hkw+du
if hkw>hig:return(hkw1/hig1)
else:return(hig1/hkw1)
def vil(vs):
txthtm=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 126, 33, 126, 35, 36, 37, 38, 39, 126, 126, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 197, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 126, 92, 126, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 224, 225, 226, 227, 228, 229, 230, 231, 232, 201, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 218, 251, 252, 253, 254, 255, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 234, 251, 252, 253, 254, 255, 184, 184, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 0, 0, 0, 0]
rs=""
if len(vs)<3:return(rs)
zapr='"'
znak='".,-!?:;)(1234567890'
words=[]
word=""
i=0
for k in range(len(vs)):
if vs[k]==" " or vs[k]=="\n":
if len(word)==1:
words.append(word)
word=""
if len(word)>1 and word[-1]==" ":word=word[:-1]
if len(word)>1:
for m in range(len(znak)):
if len(word)>1 and word[-1]==znak[m]:
word=word[:-1]
words.append(znak[m])
words.append(word)
word=""
else :
u1=vs[k]
u2=ord(u1)
u3=txthtm[u2]
u4=chr(u3)
if u4!="~":word=word+u4
return(words)
gugu={}
shablon={"all":0,"allsl":0,"в":0,"на":0,"с":0,"за":0,"к":0,"по":0,"из":0,"у":0,"от":0,"для":0,"во":0,"без":0,"до":0,"о":0,"через":0,"со":0,"при":0,"про":0,"об":0,"ко":0,"над":0,"из-за":0,"из-под":0,"под":0,"и":0,"что":0,"но":0,"а":0,"да":0,"хотя":0,"когда":0,"чтобы":0,"если":0,"тоже":0,"или":0,"то есть":0,"зато":0,"будто":0,"не":0,"как":0,"же":0,"даже":0,"бы":0,"ли":0,"только":0,"вот":0,"то":0,"ни":0,"лишь":0,"ведь":0,"вон":0,"то-есть":0,"нибудь":0,"уже":0,"либо":0}
keymw=shablon.keys()
rpw={"all":0,"allsl":0,"в":0,"на":0,"с":0,"за":0,"к":0,"по":0,"из":0,"у":0,"от":0,"для":0,"во":0,"без":0,"до":0,"о":0,"через":0,"со":0,"при":0,"про":0,"об":0,"ко":0,"над":0,"из-за":0,"из-под":0,"под":0,"и":0,"что":0,"но":0,"а":0,"да":0,"хотя":0,"когда":0,"чтобы":0,"если":0,"тоже":0,"или":0,"то есть":0,"зато":0,"будто":0,"не":0,"как":0,"же":0,"даже":0,"бы":0,"ли":0,"только":0,"вот":0,"то":0,"ни":0,"лишь":0,"ведь":0,"вон":0,"то-есть":0,"нибудь":0,"уже":0,"либо":0}
for m in range(len(namefile)):
ss=[]
fr = open (namefile[m],'r')
ss=fr.readlines ()
mwords=shablon
gugu[namefile[m]]={}
fr.close()
p=0
pp=0
for i in range(len(ss)-1):
mw=vil(ss[i])
for k in range(len(mw)):
p=p+1
mwk=mw[k]
if mwords.has_key(mwk)>0 :
mwords[mwk]=mwords[mwk]+1
pp=pp+1
mwords["all"]=p
mwords["allsl"]=pp
for j in range(len(keymw)):
uk=float(mwords["all"])
uu=float(mwords[keymw[j]])
if uk>0 and uu>0:ue=uu/uk
else: ue=0
gugu[namefile[m]][keymw[j]]=ue
hula=[]
for i in range(len(namefile)):
for k in range (i+1,len(namefile)):
gnfk=namefile[k]
gnfi=namefile[i]
wert=gugu[gnfk]
gori=gugu[gnfi]
sk=0
hhr=0
for j in range(len(keymw)):
if keymw[j]!="all" or keymw[j]!="allsl":
hkw=wert[keymw[j]]
hig=gori[keymw[j]]
if hkw>0 and hig>0:
hr=hkw/hig
hig=(gori[keymw[j]]/wert[keymw[j]])
else:
hr=0
hig=0
hkw=1
if hkw>0 and hig>0:rpw[keymw[j]]=rpw[keymw[j]]+vix(hkw,hig)
r=hkw-hig
rr=r*r
sk=sk+rr
hhr=hhr+hr
sqsk=math.sqrt(sk)
if wert["allsl"]>0 and gori["allsl"]>0:fifa=wert["allsl"]/gori["allsl"]
else:fifa=0
hula.append((fifa,str(ps1 % sqsk),str(ps1 % hhr),gnfk,gnfi))
print ("="*70)
hula.sort()
for l in range(len(hula)):
print hula[l]
fifka=[]
for m in range(len(namefile)):
fir=gugu[namefile[m]]["allsl"]
fifka.append((fir,namefile[m]))
fifka.sort()
print ("="*70)
for u in range(len(fifka)):
print fifka[u]
print ("~"*70)
krpw=rpw.keys()
duka=[]
for h in range(len(krpw)):
gnfk=krpw[h]
duka.append((rpw[krpw[h]],krpw[h]))
keymw2=[]
guska={}
duka.sort()
dukai=duka[-huha:]
for n in range(len(dukai)):
(a,c)=dukai[n]
keymw2.append(c)
for m in range(len(namefile)):
gnfk=namefile[m]
wert=gugu[gnfk]
fir=wert[c]
print c,namefile[m],fir*100000
hula2=[]
for i in range(len(namefile)):
for k in range (i+1,len(namefile)):
gnfk=namefile[k]
gnfi=namefile[i]
wert=gugu[gnfk]
gori=gugu[gnfi]
sk=0
hhr=0
for j in range(len(keymw2)):
hkw=wert[keymw2[j]]
hig=gori[keymw2[j]]
if hkw>0 and hig>0:hr=vix(hkw,hig)
else:hr=0
hhr=hhr+hr
hula2.append((hhr,gnfk,gnfi))
print ("*"*70)
hula2.sort()
for l in range(len(hula2)):
print hula2[l]