-
Notifications
You must be signed in to change notification settings - Fork 36
/
evaluate.py
executable file
·87 lines (69 loc) · 3.18 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -*- coding: utf-8 -*-
'''
Created on 4 Dec 2015
@author: Billy
'''
import os
from wvlib import wvlib
from wvlib import evalrank as eva
import sys
from mailbox import FormatError
reload(sys)
import getopt
sys.setdefaultencoding('utf8')
class CommandLine:
def __init__(self):
opts, args = getopt.getopt(sys.argv[1:],'hi:n:qf')
opts = dict(opts)
if '-h' in opts:
self.printHelp()
if '-i' in opts:
self.inputFile = opts['-i']
#self.outputFile = args[1]
else:
print >> sys.stderr, '\n*** ERROR: must specify precisely 1 arg with -i***'
self.printHelp()
def printHelp(self):
help = __doc__.replace('<PROGNAME>',sys.argv[0],1)
print >> sys.stderr, help
exit()
if __name__ =='__main__':
#filePath=os.path.join(os.path.dirname(__file__), 'w2vData', 'PubMed15_Dependancy1.txt') #PubMed-w2v.bin #PubMed-and-PMC-w2v.bin
evafilePath=[os.path.join(os.path.dirname(__file__), 'wvlib/word-similarities/MayoSRS', 'MayoSRS.txt'),
os.path.join(os.path.dirname(__file__), 'wvlib/word-similarities/UMNSRS', 'UMNSRS-sim.txt'),\
os.path.join(os.path.dirname(__file__), 'wvlib/word-similarities/UMNSRS', 'UMNSRS-rel.txt')]
from tools import utilities as util
config = CommandLine()
#from word2Vec import tools as util
if os.path.isfile(config.inputFile):
try:
wv = wvlib.load(config.inputFile).normalize()
#references = [(r, eva.read_referenceSingleWords(r)) for r in evafilePath]
references = [(r, eva.read_reference(r)) for r in evafilePath]
print '%20s\trho\tmissed\ttotal\tratio' % 'dataset'
for name, ref in references:
#rho, count = eva.evaluateTest(newWordVecs, ref,wordList)
rho, count = eva.evaluate(wv, ref)
total, miss = len(ref), len(ref) - count
print '%20s\t%.4f\t%d\t%d\t(%.2f%%)' % \
(eva.baseroot(name), rho, miss, total, 100.*miss/total)
except FormatError:
print "skip",config.inputFile
else:
folderList=util.get_filepaths(config.inputFile)
for i,item in enumerate(folderList):
filename, file_extension = os.path.splitext(item)
#print i,item
if ".DS_Store" not in item:
try:
wv = wvlib.load(item).normalize()
references = [(r, eva.read_referenceSingleWords(r)) for r in evafilePath]
print '%20s\trho\tmissed\ttotal\tratio' % 'dataset'
for name, ref in references:
#rho, count = eva.evaluateTest(newWordVecs, ref,wordList)
rho, count = eva.evaluate(wv, ref)
total, miss = len(ref), len(ref) - count
print '%20s\t%.4f\t%d\t%d\t(%.2f%%)' % \
(eva.baseroot(name), rho, miss, total, 100.*miss/total)
except FormatError:
print "skip",item