-
Notifications
You must be signed in to change notification settings - Fork 364
/
Copy pathrep.py
87 lines (76 loc) · 3.49 KB
/
rep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#encoding:utf-8
# from torch.utils import data
import torch as t
import numpy as np
from config import opt
import models
import json
import fire
import csv
import tqdm
from torch.autograd import Variable
def load_data(type_='char'):
id2label = t.load(opt.id2label)
question_d = np.load(opt.test_data_path)
index2qid = question_d['index2qid'].item()
return (question_d['title_char'],question_d['content_char']),( question_d['title_word'],question_d['content_word']),index2qid,id2label
def write_csv(result,index2qid,labels):
f=open(opt.result_path, "wa")
csv_writer = csv.writer(f, dialect="excel")
rows=[0 for _ in range(result.shape[0])]
for i in range(result.shape[0]):
row=[index2qid[i]]+[labels[str(int(i_))] for i_ in result[i]]
rows[i]=row
csv_writer.writerows(rows)
def dotest(model,title,content):
title,content = (Variable(t.from_numpy(title[0]).long().cuda(),volatile=True),Variable(t.from_numpy(title[1]).long().cuda(),volatile=True)),(Variable(t.from_numpy(content[0]).long().cuda(),volatile=True),Variable(t.from_numpy(content[1]).long().cuda(),volatile=True))
score = model(title,content)
probs=t.nn.functional.sigmoid(score)
return probs.data.cpu().numpy()
def test_one(file,data_):
test_data_title,test_data_content,index2qid,labels = data_
opt.model_path = file
model= models.MultiModelAll4zhihu(opt).cuda().eval()
Num=len(test_data_title[0])
result=np.zeros((Num,1999))
for i in tqdm.tqdm(range(Num)):
if i%opt.batch_size==0 and i>0:
title=np.array(test_data_title[0][i-opt.batch_size:i]),np.array(test_data_title[1][i-opt.batch_size:i])
content=np.array(test_data_content[0][i-opt.batch_size:i]),np.array(test_data_content[1][i-opt.batch_size:i])
result[i-opt.batch_size:i,:]=dotest(model,title,content)
if Num%opt.batch_size!=0:
title=np.array(test_data_title[0][opt.batch_size*(Num/opt.batch_size):]),np.array(test_data_title[1][opt.batch_size*(Num/opt.batch_size):])
content=np.array(test_data_content[0][opt.batch_size*(Num/opt.batch_size):]) ,np.array(test_data_content[1][opt.batch_size*(Num/opt.batch_size):])
result[opt.batch_size*(Num/opt.batch_size):,:]=dotest(model,title,content)
# t.save(t.from_numpy(result).float(),opt.result_path)
return t.from_numpy(result).float()
if __name__=='__main__':
# 如果路径不一致记得修改这里
opt.result_path = 'submission.csv'
opt.test_data_path='test.npz'
opt.id2label='id2label.json'
data_=load_data(type_='all')
test_data_title,test_data_content,index2qid,labels = data_
label2qid = labels
# 如果路径不一致记得修改这里
model_paths = [
'MultiModelAll_word_0.416523282174',
'MultiModelAll_word_0.417185977233',
'MultiModelAll_word_0.419866393964',
'MultiModelAll_word_0.421331405593',
'MultiModelAll_word_0.421692025324',
'MultiModelAll_word_0.423535867989',
'MultiModelAll_word_0.419245894992'
]
result= 0
for model_path in model_paths:
result += test_one(model_path,data_)
result_labels=(result).topk(5,1)[1]
## 写csv 提交结果
rows = range(result.size(0))
for ii,item in enumerate(result_labels):
rows[ii] = [index2qid[ii]] + [label2qid[str(_)] for _ in item ]
import csv
with open(opt.result_path,'w') as f:
writer = csv.writer(f)
writer.writerows(rows)