-
Notifications
You must be signed in to change notification settings - Fork 6.8k
Code modification for testcases of various network models in directory example #12498
Changes from 9 commits
b44acbf
61afc8f
39f2304
f579e22
4bd6259
23ef75f
717a943
6bc4a82
15c7907
4d48036
a3a4a41
cac0676
fd5aa86
e12170f
eb63d58
eda77a6
e4c4153
8aeba26
e55058f
0c8803f
3447dab
0fbca75
dd27ee3
485f647
e9b138a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -156,19 +156,22 @@ def get_toy_sym(teacher=True, teacher_noise_precision=None): | |
return net | ||
|
||
|
||
def dev(): | ||
return mx.gpu() | ||
def dev(xpu): | ||
if xpu >= 0: | ||
return mx.gpu() | ||
else: | ||
return mx.cpu() | ||
|
||
|
||
def run_mnist_SGD(training_num=50000): | ||
X, Y, X_test, Y_test = load_mnist(training_num) | ||
minibatch_size = 100 | ||
net = get_mnist_sym() | ||
data_shape = (minibatch_size,) + X.shape[1::] | ||
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), | ||
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} | ||
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)), | ||
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(xpu))} | ||
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) | ||
exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y, | ||
exe, exe_params, _ = SGD(sym=net, dev=dev(xpu), data_inputs=data_inputs, X=X, Y=Y, | ||
X_test=X_test, Y_test=Y_test, | ||
total_iter_num=1000000, | ||
initializer=initializer, | ||
|
@@ -180,18 +183,18 @@ def run_mnist_SGLD(training_num=50000): | |
minibatch_size = 100 | ||
net = get_mnist_sym() | ||
data_shape = (minibatch_size,) + X.shape[1::] | ||
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), | ||
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} | ||
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(xpu))} | ||
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) | ||
exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y, | ||
exe, sample_pool = SGLD(sym=net, dev=dev(xpu), data_inputs=data_inputs, X=X, Y=Y, | ||
X_test=X_test, Y_test=Y_test, | ||
total_iter_num=1000000, | ||
initializer=initializer, | ||
learning_rate=4E-6, prior_precision=1.0, minibatch_size=100, | ||
thin_interval=100, burn_in_iter_num=1000) | ||
|
||
|
||
def run_mnist_DistilledSGLD(training_num=50000): | ||
def run_mnist_DistilledSGLD(training_num=50000, xpu=0): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This means, by default, mx.gpu(0). Should it default to CPU instead by making xpu=None? |
||
X, Y, X_test, Y_test = load_mnist(training_num) | ||
minibatch_size = 100 | ||
if training_num >= 10000: | ||
|
@@ -214,10 +217,10 @@ def run_mnist_DistilledSGLD(training_num=50000): | |
logsoftmax = LogSoftmax() | ||
student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden) | ||
data_shape = (minibatch_size,) + X.shape[1::] | ||
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), | ||
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} | ||
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), | ||
'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())} | ||
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)), | ||
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(xpu))} | ||
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)), | ||
'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(xpu))} | ||
teacher_initializer = BiasXavier(factor_type="in", magnitude=1) | ||
student_initializer = BiasXavier(factor_type="in", magnitude=1) | ||
student_exe, student_params, _ = \ | ||
|
@@ -231,17 +234,17 @@ def run_mnist_DistilledSGLD(training_num=50000): | |
teacher_learning_rate=teacher_learning_rate, | ||
student_learning_rate=student_learning_rate, | ||
teacher_prior_precision=teacher_prior, student_prior_precision=student_prior, | ||
perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev()) | ||
perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev(xpu)) | ||
|
||
|
||
def run_toy_SGLD(): | ||
def run_toy_SGLD(xpu=0): | ||
X, Y, X_test, Y_test = load_toy() | ||
minibatch_size = 1 | ||
teacher_noise_precision = 1.0 / 9.0 | ||
net = get_toy_sym(True, teacher_noise_precision) | ||
data_shape = (minibatch_size,) + X.shape[1::] | ||
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), | ||
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} | ||
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)), | ||
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(xpu))} | ||
initializer = mx.init.Uniform(0.07) | ||
exe, params, _ = \ | ||
SGLD(sym=net, data_inputs=data_inputs, | ||
|
@@ -253,20 +256,20 @@ def run_toy_SGLD(): | |
burn_in_iter_num=1000, | ||
thin_interval=10, | ||
task='regression', | ||
minibatch_size=minibatch_size, dev=dev()) | ||
minibatch_size=minibatch_size, dev=dev(xpu)) | ||
|
||
|
||
def run_toy_DistilledSGLD(): | ||
def run_toy_DistilledSGLD(xpu=0): | ||
X, Y, X_test, Y_test = load_toy() | ||
minibatch_size = 1 | ||
teacher_noise_precision = 1.0 | ||
teacher_net = get_toy_sym(True, teacher_noise_precision) | ||
student_net = get_toy_sym(False) | ||
data_shape = (minibatch_size,) + X.shape[1::] | ||
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), | ||
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} | ||
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev())} | ||
# 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())} | ||
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)), | ||
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(xpu))} | ||
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu))} | ||
# 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(xpu))} | ||
TaoLv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
teacher_initializer = mx.init.Uniform(0.07) | ||
student_initializer = mx.init.Uniform(0.07) | ||
student_grad_f = lambda student_outputs, teacher_pred: \ | ||
|
@@ -284,21 +287,21 @@ def run_toy_DistilledSGLD(): | |
student_grad_f=student_grad_f, | ||
teacher_prior_precision=0.1, student_prior_precision=0.001, | ||
perturb_deviation=0.1, minibatch_size=minibatch_size, task='regression', | ||
dev=dev()) | ||
dev=dev(xpu)) | ||
|
||
|
||
def run_toy_HMC(): | ||
def run_toy_HMC(xpu=0): | ||
X, Y, X_test, Y_test = load_toy() | ||
minibatch_size = Y.shape[0] | ||
noise_precision = 1 / 9.0 | ||
net = get_toy_sym(True, noise_precision) | ||
data_shape = (minibatch_size,) + X.shape[1::] | ||
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), | ||
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} | ||
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)), | ||
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(xpu))} | ||
initializer = mx.init.Uniform(0.07) | ||
sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, | ||
sample_num=300000, initializer=initializer, prior_precision=1.0, | ||
learning_rate=1E-3, L=10, dev=dev()) | ||
learning_rate=1E-3, L=10, dev=dev(xpu)) | ||
|
||
|
||
def run_synthetic_SGLD(): | ||
|
@@ -350,21 +353,22 @@ def run_synthetic_SGLD(): | |
help="Type of algorithm to use. 0 --> SGD, 1 --> SGLD, other-->DistilledSGLD") | ||
parser.add_argument("-t", "--training", type=int, default=50000, | ||
help="Number of training samples") | ||
parser.add_argument("--gpu", type=int, default=0, help="if -1 then use cpu else use gpu") | ||
args = parser.parse_args() | ||
training_num = args.training | ||
if args.dataset == 1: | ||
if 0 == args.algorithm: | ||
run_mnist_SGD(training_num) | ||
run_mnist_SGD(training_num, xpu=args.gpu) | ||
elif 1 == args.algorithm: | ||
run_mnist_SGLD(training_num) | ||
run_mnist_SGLD(training_num, xpu=args.gpu) | ||
else: | ||
run_mnist_DistilledSGLD(training_num) | ||
run_mnist_DistilledSGLD(training_num, xpu=args.gpu) | ||
elif args.dataset == 0: | ||
if 1 == args.algorithm: | ||
run_toy_SGLD() | ||
run_toy_SGLD(xpu=args.gpu) | ||
elif 2 == args.algorithm: | ||
run_toy_DistilledSGLD() | ||
run_toy_DistilledSGLD(xpu=args.gpu) | ||
elif 3 == args.algorithm: | ||
run_toy_HMC() | ||
run_toy_HMC(xpu=args.gpu) | ||
else: | ||
run_synthetic_SGLD() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -129,7 +129,7 @@ def getpad(self): | |
|
||
class AnchorLoader(mx.io.DataIter): | ||
def __init__(self, roidb, batch_size, short, max_size, mean, std, | ||
feat_sym, anchor_generator: AnchorGenerator, anchor_sampler: AnchorSampler, | ||
feat_sym, anchor_generator=AnchorGenerator, anchor_sampler=AnchorSampler, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explain? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The type of parameter anchor_generator must by Class AnchorGenerator, so i think it should be |
||
shuffle=False): | ||
super(AnchorLoader, self).__init__() | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,7 +33,7 @@ def train_net(sym, roidb, args): | |
logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) | ||
|
||
# setup multi-gpu | ||
ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] | ||
ctx = [mx.cpu()] if args.gpus == None else [mx.gpu(int(i)) for i in args.gpus.split(',')] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if not args.gpu |
||
batch_size = args.rcnn_batch_size * len(ctx) | ||
|
||
# load training data | ||
|
@@ -127,7 +127,7 @@ def parse_args(): | |
parser.add_argument('--pretrained', type=str, default='', help='path to pretrained model') | ||
parser.add_argument('--dataset', type=str, default='voc', help='training dataset') | ||
parser.add_argument('--imageset', type=str, default='', help='imageset splits') | ||
parser.add_argument('--gpus', type=str, default='0', help='gpu devices eg. 0,1') | ||
parser.add_argument('--gpus', type=str, help='gpu devices eg. 0,1 if null then use cpu') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if not provide, then use cpu. |
||
parser.add_argument('--epochs', type=int, default=10, help='training epochs') | ||
parser.add_argument('--lr', type=float, default=0.001, help='base learning rate') | ||
parser.add_argument('--lr-decay-epoch', type=str, default='7', help='epoch to decay lr') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does it need
def run_mnist_SGD(training_num=50000, xpu=None)
here?