diff --git a/dataloader.py b/dataloader.py index 3ce9b0a..d4a1238 100644 --- a/dataloader.py +++ b/dataloader.py @@ -86,18 +86,19 @@ def __iter__(self): def get_dataloaders(args): + IMG_SIZE = 224 train_loader, val_loader, test_loader = None, None, None if args.dataset == 'cifar10': normalize = transforms.Normalize(mean=[0.4914, 0.4824, 0.4467], std=[0.2471, 0.2435, 0.2616]) - train_set = datasets.CIFAR10(args.data_root, train=True, + train_set = datasets.CIFAR10(args.data_root, train=True, download=True, transform=transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ])) - val_set = datasets.CIFAR10(args.data_root, train=False, + val_set = datasets.CIFAR10(args.data_root, train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), normalize @@ -163,18 +164,14 @@ def get_dataloaders(args): train_loader = torch.utils.data.DataLoader( train_set, batch_size=args.batch_size, - sampler=train_sampler, - num_workers=args.workers, - pin_memory=True) + sampler=train_sampler,) if 'val' in args.splits: val_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_set_index[-num_sample_valid:]) if args.distributed: val_sampler = DistributedSamplerWrapper(val_sampler, shuffle=False) val_loader = torch.utils.data.DataLoader( train_set, batch_size=args.batch_size, - sampler=val_sampler, - num_workers=args.val_workers, - pin_memory=True) + sampler=val_sampler) if 'test' in args.splits: if args.distributed: test_sampler = torch.utils.data.distributed.DistributedSampler(val_set) @@ -184,8 +181,6 @@ def get_dataloaders(args): test_loader = torch.utils.data.DataLoader( val_set, batch_size=args.batch_size, - num_workers=args.val_workers, - pin_memory=True, **additional_args) else: if 'train' in args.splits: @@ -197,8 +192,6 @@ def get_dataloaders(args): train_loader = torch.utils.data.DataLoader( train_set, batch_size=args.batch_size, - num_workers=args.workers, - pin_memory=True, **additional_args) if 'val' in args.splits: if args.distributed: @@ -209,8 +202,6 @@ def get_dataloaders(args): val_loader = torch.utils.data.DataLoader( val_set, batch_size=args.batch_size, - num_workers=args.val_workers, - pin_memory=True, **additional_args) test_loader = val_loader diff --git a/models/dynamic_net.py b/models/dynamic_net.py index 80d5e11..9ea5c65 100644 --- a/models/dynamic_net.py +++ b/models/dynamic_net.py @@ -89,7 +89,7 @@ def forward_all(self, x, stage): outs = self.model(x, stage) preds = [0] for i in range(len(outs)): - pred = (outs[i] + preds[-1]) * self.reweight[i] + pred = (outs[i] + preds[-1]) * self.reweight[i] # ensembling preds.append(pred) if i == stage: break diff --git a/models/msdnet.py b/models/msdnet.py index 13c0613..e1878a2 100644 --- a/models/msdnet.py +++ b/models/msdnet.py @@ -194,7 +194,7 @@ class ClassifierModule(nn.Module): def __init__(self, m, channel, num_classes): super(ClassifierModule, self).__init__() self.m = m - self.linear = nn.Linear(channel, num_classes) + self.linear = nn.Linear(channel, num_classes) # only linear layer is here, at exit of classifier thus we can use linear to determine where to exit. def forward(self, x): res = self.m(x[-1]) @@ -335,7 +335,7 @@ def _build_classifier_imagenet(self, nIn, num_classes): ) return ClassifierModule(conv, nIn, num_classes) - def forward(self, x, stage=None): + def forward(self, x, stage=None): # No gradient rescaling res = [] for i in range(self.nBlocks): x = self.blocks[i](x) diff --git a/models/msdnet_ge.py b/models/msdnet_ge.py index 32b36b2..57ee294 100644 --- a/models/msdnet_ge.py +++ b/models/msdnet_ge.py @@ -373,9 +373,9 @@ def forward(self, x, stage=None): res = [] for i in range(self.nBlocks): x = self.blocks[i](x) - x[-1] = gradient_rescale(x[-1], 1.0 / (self.nBlocks - i)) + x[-1] = gradient_rescale(x[-1], 1.0 / (self.nBlocks - i)) # scale before passing to the classifier. This way when training the classifier, we are scaling pred, _ = self.classifier[i](x) - x[-1] = gradient_rescale(x[-1], (self.nBlocks - i - 1)) + x[-1] = gradient_rescale(x[-1], (self.nBlocks - i - 1)) # unscale res.append(pred) if i == stage: break diff --git a/msdnet_scripts/eval_cifar100_any.sh b/msdnet_scripts/eval_cifar100_any.sh index 64acf26..0ef6bdc 100755 --- a/msdnet_scripts/eval_cifar100_any.sh +++ b/msdnet_scripts/eval_cifar100_any.sh @@ -4,7 +4,7 @@ curr_dir="$( cd "$(dirname "$0")" ; pwd -P )" train_id="exp0_msdge_cifar100" python3 ../eval_cifar100.py \ - --data-root ${curr_dir}/../data/cifar100 \ + --data-root /home/joud/code/relu_analysis/Boosted-Dynamic-Networks/data/cifar100 \ --dataset cifar100 \ --result_dir "${curr_dir}/../results/boostnet/$train_id" \ --arch msdnet_ge \ diff --git a/msdnet_scripts/train_cifar100_any.sh b/msdnet_scripts/train_cifar100_any.sh index bfe2b85..37d33c1 100755 --- a/msdnet_scripts/train_cifar100_any.sh +++ b/msdnet_scripts/train_cifar100_any.sh @@ -1,13 +1,13 @@ #!/bin/bash curr_dir="$( cd "$(dirname "$0")" ; pwd -P )" -train_id="exp0_msdge_cifar100_any" -result_dir="${curr_dir}/../results/boostnet/$train_id" +train_id="exp0_msdge_cifar10_any" +result_dir=$"/home/joud/code/relu_analysis/Boosted-Dynamic-Networks/results/boostnet$train_id" mkdir -p $result_dir python3 ../train_cifar100.py \ - --data-root ${curr_dir}/../data/cifar100 \ - --dataset cifar100 \ + --data-root /home/joud/code/relu_analysis/Boosted-Dynamic-Networks/data/cifar10 \ + --dataset cifar10 \ --result_dir $result_dir \ --arch msdnet_ge \ --ensemble_reweight 0.5 \ diff --git a/requirements.txt b/requirements.txt index 05f3bca..cb26c44 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ certifi==2021.10.8 cffi==1.15.0 charset-normalizer==2.0.12 cloudpickle==2.0.0 -distro-info==0.21 +# distro-info==0.21 docopt==0.6.2 flatbuffers==1.12 gast==0.4.0 @@ -18,7 +18,7 @@ h5py==3.1.0 horovod==0.24.2 idna==3.3 importlib-metadata==4.11.3 -iotop==0.6 +# iotop==0.6 keras-nightly==2.5.0.dev2021032900 Keras-Preprocessing==1.1.2 Markdown==3.3.6 @@ -31,27 +31,27 @@ psutil==5.9.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 -pycurl==7.43.0.2 -PyGObject==3.30.4 -python-apt==1.8.4.3 +# pycurl==7.43.0.2 +# PyGObject==3.30.4 +# python-apt==1.8.4.3 PyYAML==6.0 requests==2.27.1 requests-oauthlib==1.3.1 rsa==4.8 six==1.15.0 -tensorboard==2.5.0 -tensorboard-data-server==0.6.1 -tensorboard-plugin-wit==1.8.1 +# tensorboard==2.5.0 +# tensorboard-data-server==0.6.1 +# tensorboard-plugin-wit==1.8.1 tensorflow==2.5.0 tensorflow-estimator==2.5.0 -tensorrt @ file:///tensorrt-8.0.3.4-cp37-none-linux_x86_64.whl +# tensorrt @ file:///tensorrt-8.0.3.4-cp37-none-linux_x86_64.whl termcolor==1.1.0 torch==1.9.0 -torchaudio==0.10.0+cu113 +# torchaudio==0.10.0+cu113 torchvision==0.10.0 tqdm==4.61.1 typing-extensions==3.7.4.3 -unattended-upgrades==0.1 +# unattended-upgrades==0.1 urllib3==1.26.9 Werkzeug==2.1.0 wrapt==1.12.1 diff --git a/train_cifar100.py b/train_cifar100.py index c4b3b4f..f486a90 100644 --- a/train_cifar100.py +++ b/train_cifar100.py @@ -44,7 +44,7 @@ def train(model, train_loader, optimizer, epoch, sum_writer): n_blocks = args.nBlocks * len(args.scale_list) if args.arch == 'ranet' else args.nBlocks for it, (x, y) in enumerate(train_loader): x, y = x.cuda(), y.cuda() - preds, pred_ensembles = model.forward_all(x, n_blocks - 1) + preds, pred_ensembles = model.forward_all(x, n_blocks - 1) # first output is the raw pred of each classifier, the second is the ensembled one. loss_all = 0 for stage in range(n_blocks): # train weak learner @@ -85,6 +85,7 @@ def main(): backbone = model_func(args) n_flops, n_params = measure_model(backbone, 32, 32) + print(f'FLOPS {n_flops}') torch.save(n_flops, os.path.join(args.result_dir, 'flops.pth')) n_blocks = args.nBlocks * len(args.scale_list) if args.arch == 'ranet' else args.nBlocks for i in range(n_blocks): @@ -96,7 +97,7 @@ def main(): if args.arch == 'ranet': model = dynamic_net_ranet(backbone, args).cuda_all() else: - model = dynamic_net(backbone, args).cuda_all() + model = dynamic_net(backbone, args).cuda_all() # MSDNet train_loader, val_loader, _ = get_dataloaders(args) if args.arch != 'ranet': @@ -123,6 +124,7 @@ def main(): scheduler.load_state_dict(ckpt['scheduler']) best_accu = -1 + val_accs = [] for epoch in range(start_epoch, args.epochs): logging.info(f'epoch {epoch}') @@ -133,6 +135,7 @@ def main(): for i, accu in enumerate(accus_test): log_step((epoch + 1) * len(train_loader), f'stage_{i}_accu', accu, sum_writer) + accus_train = test(model, train_loader) for i, accu in enumerate(accus_train): log_step((epoch + 1) * len(train_loader), f'stage_{i}_accu_train', accu, sum_writer)