Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Code modification for testcases of various network models in directory example #12498

Merged
merged 25 commits into from
Jan 11, 2019
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 38 additions & 34 deletions example/bayesian-methods/bdk_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,19 +156,22 @@ def get_toy_sym(teacher=True, teacher_noise_precision=None):
return net


def dev():
return mx.gpu()
def dev(xpu):
if xpu >= 0:
return mx.gpu()
else:
return mx.cpu()


def run_mnist_SGD(training_num=50000):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it need def run_mnist_SGD(training_num=50000, xpu=None) here?

'softmax_label': nd.zeros((minibatch_size,), ctx=dev(xpu))}
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
exe, exe_params, _ = SGD(sym=net, dev=dev(xpu), data_inputs=data_inputs, X=X, Y=Y,
X_test=X_test, Y_test=Y_test,
total_iter_num=1000000,
initializer=initializer,
Expand All @@ -180,18 +183,18 @@ def run_mnist_SGLD(training_num=50000):
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

def run_mnist_SGLD(training_num=50000, xpu=None) here?

'softmax_label': nd.zeros((minibatch_size,), ctx=dev(xpu))}
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
exe, sample_pool = SGLD(sym=net, dev=dev(xpu), data_inputs=data_inputs, X=X, Y=Y,
X_test=X_test, Y_test=Y_test,
total_iter_num=1000000,
initializer=initializer,
learning_rate=4E-6, prior_precision=1.0, minibatch_size=100,
thin_interval=100, burn_in_iter_num=1000)


def run_mnist_DistilledSGLD(training_num=50000):
def run_mnist_DistilledSGLD(training_num=50000, xpu=0):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means, by default, mx.gpu(0). Should it default to CPU instead by making xpu=None?

X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
if training_num >= 10000:
Expand All @@ -214,10 +217,10 @@ def run_mnist_DistilledSGLD(training_num=50000):
logsoftmax = LogSoftmax()
student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
data_shape = (minibatch_size,) + X.shape[1::]
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(xpu))}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)),
'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(xpu))}
teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
student_initializer = BiasXavier(factor_type="in", magnitude=1)
student_exe, student_params, _ = \
Expand All @@ -231,17 +234,17 @@ def run_mnist_DistilledSGLD(training_num=50000):
teacher_learning_rate=teacher_learning_rate,
student_learning_rate=student_learning_rate,
teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())
perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev(xpu))


def run_toy_SGLD():
def run_toy_SGLD(xpu=0):
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0 / 9.0
net = get_toy_sym(True, teacher_noise_precision)
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(xpu))}
initializer = mx.init.Uniform(0.07)
exe, params, _ = \
SGLD(sym=net, data_inputs=data_inputs,
Expand All @@ -253,20 +256,20 @@ def run_toy_SGLD():
burn_in_iter_num=1000,
thin_interval=10,
task='regression',
minibatch_size=minibatch_size, dev=dev())
minibatch_size=minibatch_size, dev=dev(xpu))


def run_toy_DistilledSGLD():
def run_toy_DistilledSGLD(xpu=0):
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0
teacher_net = get_toy_sym(True, teacher_noise_precision)
student_net = get_toy_sym(False)
data_shape = (minibatch_size,) + X.shape[1::]
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev())}
# 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(xpu))}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu))}
# 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(xpu))}
TaoLv marked this conversation as resolved.
Show resolved Hide resolved
teacher_initializer = mx.init.Uniform(0.07)
student_initializer = mx.init.Uniform(0.07)
student_grad_f = lambda student_outputs, teacher_pred: \
Expand All @@ -284,21 +287,21 @@ def run_toy_DistilledSGLD():
student_grad_f=student_grad_f,
teacher_prior_precision=0.1, student_prior_precision=0.001,
perturb_deviation=0.1, minibatch_size=minibatch_size, task='regression',
dev=dev())
dev=dev(xpu))


def run_toy_HMC():
def run_toy_HMC(xpu=0):
X, Y, X_test, Y_test = load_toy()
minibatch_size = Y.shape[0]
noise_precision = 1 / 9.0
net = get_toy_sym(True, noise_precision)
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(xpu)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(xpu))}
initializer = mx.init.Uniform(0.07)
sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test,
sample_num=300000, initializer=initializer, prior_precision=1.0,
learning_rate=1E-3, L=10, dev=dev())
learning_rate=1E-3, L=10, dev=dev(xpu))


def run_synthetic_SGLD():
Expand Down Expand Up @@ -350,21 +353,22 @@ def run_synthetic_SGLD():
help="Type of algorithm to use. 0 --> SGD, 1 --> SGLD, other-->DistilledSGLD")
parser.add_argument("-t", "--training", type=int, default=50000,
help="Number of training samples")
parser.add_argument("--gpu", type=int, default=0, help="if -1 then use cpu else use gpu")
args = parser.parse_args()
training_num = args.training
if args.dataset == 1:
if 0 == args.algorithm:
run_mnist_SGD(training_num)
run_mnist_SGD(training_num, xpu=args.gpu)
elif 1 == args.algorithm:
run_mnist_SGLD(training_num)
run_mnist_SGLD(training_num, xpu=args.gpu)
else:
run_mnist_DistilledSGLD(training_num)
run_mnist_DistilledSGLD(training_num, xpu=args.gpu)
elif args.dataset == 0:
if 1 == args.algorithm:
run_toy_SGLD()
run_toy_SGLD(xpu=args.gpu)
elif 2 == args.algorithm:
run_toy_DistilledSGLD()
run_toy_DistilledSGLD(xpu=args.gpu)
elif 3 == args.algorithm:
run_toy_HMC()
run_toy_HMC(xpu=args.gpu)
else:
run_synthetic_SGLD()
3 changes: 2 additions & 1 deletion example/fcn-xs/fcn_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@

logger = logging.getLogger()
logger.setLevel(logging.INFO)
ctx = mx.gpu(0)

def main():
ctx = mx.gpu(0) if args.gpu >=0 else mx.cpu()
fcnxs = symbol_fcnxs.get_fcn32s_symbol(numclass=21, workspace_default=1536)
fcnxs_model_prefix = "model_pascal/FCN32s_VGG16"
if args.model == "fcn16s":
Expand Down Expand Up @@ -85,6 +85,7 @@ def main():
help='the init type of fcn-xs model, e.g. vgg16, fcnxs')
parser.add_argument('--retrain', action='store_true', default=False,
help='true means continue training.')
parser.add_argument("--gpu", type=int, default=0, help="if negative then use cpu else use gpu")
args = parser.parse_args()
logging.info(args)
main()
62 changes: 36 additions & 26 deletions example/multi-task/example_multi_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
import numpy as np
import logging
import time
import argparse

logging.basicConfig(level=logging.DEBUG)


def build_network():
data = mx.symbol.Variable('data')
fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
Expand Down Expand Up @@ -131,29 +133,37 @@ def get_name_value(self):
name, value = self.get()
return list(zip(name, value))


batch_size=100
num_epochs=100
device = mx.gpu(0)
lr = 0.01

network = build_network()
train, val = get_mnist_iterator(batch_size=batch_size, input_shape = (784,))
train = Multi_mnist_iterator(train)
val = Multi_mnist_iterator(val)


model = mx.mod.Module(
context = device,
symbol = network,
label_names = ('softmax1_label', 'softmax2_label'))

model.fit(
train_data = train,
eval_data = val,
eval_metric = Multi_Accuracy(num=2),
num_epoch = num_epochs,
optimizer_params = (('learning_rate', lr), ('momentum', 0.9), ('wd', 0.00001)),
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34),
batch_end_callback = mx.callback.Speedometer(batch_size, 50))

if __name__ == '__main__':
np.random.seed(100)
mx.random.seed(100)
parser = argparse.ArgumentParser(description="This is a simple example to show how to use mxnet for multi-task learning. It uses MNIST as an example and mocks up the multi-label task")
parser.add_argument("--batch_size", type=int, default=100, help="Batch_size paramater")
parser.add_argument("--num_epochs", type=int, default=100, help="number of epoches")
parser.add_argument("--gpu", type=int, default=0, help="positive and zero for gpu, else for cpu")
parser.add_argument("--lr", type=float, default=0.01, help="learning rate parameter")

args = parser.parse_args()
batch_size = args.batch_size
num_epochs = args.num_epochs
lr = args.lr
device = mx.gpu(0) if args.gpu >=0 else mx.cpu()

network = build_network()
train, val = get_mnist_iterator(batch_size=batch_size, input_shape = (784,))
train = Multi_mnist_iterator(train)
val = Multi_mnist_iterator(val)


model = mx.mod.Module(
context = device,
symbol = network,
label_names = ('softmax1_label', 'softmax2_label'))

model.fit(
train_data = train,
eval_data = val,
eval_metric = Multi_Accuracy(num=2),
num_epoch = num_epochs,
optimizer_params = (('learning_rate', lr), ('momentum', 0.9), ('wd', 0.00001)),
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34),
batch_end_callback = mx.callback.Speedometer(batch_size, 50))
2 changes: 1 addition & 1 deletion example/rcnn/symdata/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def getpad(self):

class AnchorLoader(mx.io.DataIter):
def __init__(self, roidb, batch_size, short, max_size, mean, std,
feat_sym, anchor_generator: AnchorGenerator, anchor_sampler: AnchorSampler,
feat_sym, anchor_generator=AnchorGenerator, anchor_sampler=AnchorSampler,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type of parameter anchor_generator must by Class AnchorGenerator, so i think it should be :, not = , i will do changes corresponding

shuffle=False):
super(AnchorLoader, self).__init__()

Expand Down
4 changes: 2 additions & 2 deletions example/rcnn/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_net(sym, imdb, args):
logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

# setup context
ctx = mx.gpu(args.gpu)
ctx = mx.gpu(args.gpu) if args.gpu >=0 else mx.cpu()

# load testing data
test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side,
Expand Down Expand Up @@ -94,7 +94,7 @@ def parse_args():
parser.add_argument('--params', type=str, default='', help='path to trained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--imageset', type=str, default='', help='imageset splits')
parser.add_argument('--gpu', type=int, default=0, help='gpu device eg. 0')
parser.add_argument('--gpu', type=int, default=0, help='gpu device eg. 0 if negative then use cpu')
# faster rcnn params
parser.add_argument('--img-short-side', type=int, default=600)
parser.add_argument('--img-long-side', type=int, default=1000)
Expand Down
4 changes: 2 additions & 2 deletions example/rcnn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def train_net(sym, roidb, args):
logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

# setup multi-gpu
ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
ctx = [mx.cpu()] if args.gpus == None else [mx.gpu(int(i)) for i in args.gpus.split(',')]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if not args.gpu

batch_size = args.rcnn_batch_size * len(ctx)

# load training data
Expand Down Expand Up @@ -127,7 +127,7 @@ def parse_args():
parser.add_argument('--pretrained', type=str, default='', help='path to pretrained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--imageset', type=str, default='', help='imageset splits')
parser.add_argument('--gpus', type=str, default='0', help='gpu devices eg. 0,1')
parser.add_argument('--gpus', type=str, help='gpu devices eg. 0,1 if null then use cpu')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if not provide, then use cpu.

parser.add_argument('--epochs', type=int, default=10, help='training epochs')
parser.add_argument('--lr', type=float, default=0.001, help='base learning rate')
parser.add_argument('--lr-decay-epoch', type=str, default='7', help='epoch to decay lr')
Expand Down
13 changes: 10 additions & 3 deletions example/rnn-time-major/rnn_cell_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import os
import numpy as np
import mxnet as mx
import argparse

from bucket_io import BucketSentenceIter, default_build_vocab

Expand Down Expand Up @@ -79,7 +80,14 @@ def Perplexity(label, pred):


if __name__ == '__main__':
batch_size = 128
parser = argparse.ArgumentParser(
description="This example demonstrates an RNN implementation with Time-major layout. This implementation shows 1.5x-2x speedups compared to Batch-major RNN.")
parser.add_argument("--batch_size", type=int, default=128, help="Batch size parameter")
parser.add_argument('--cuda', action='store_true', dest='cuda', help='train on GPU with CUDA')
parser.add_argument('--no-cuda', action='store_false', dest='cuda', help='train on CPU')
parser.add_argument('--device-id', type=str, default='0', help='Update count per available GPUs')
args = parser.parse_args()
batch_size = args.batch_size
buckets = [10, 20, 30, 40, 50, 60]
num_hidden = 200
num_embed = 200
Expand All @@ -90,8 +98,7 @@ def Perplexity(label, pred):
momentum = 0.0

# Update count per available GPUs
gpu_count = 1
contexts = [mx.context.gpu(i) for i in range(gpu_count)]
contexts = [mx.context.gpu(int(i)) for i in args.device-id.split(',')] if args.cuda else [mx.context.cpu()]

vocab = default_build_vocab(os.path.join(data_dir, 'sherlockholmes.train.txt'))

Expand Down