diff --git a/fluid/PaddleCV/deeplabv3+/.gitignore b/fluid/PaddleCV/deeplabv3+/.gitignore index d086de2daf..cfe4708603 100644 --- a/fluid/PaddleCV/deeplabv3+/.gitignore +++ b/fluid/PaddleCV/deeplabv3+/.gitignore @@ -1,3 +1,6 @@ -deeplabv3plus_xception65_initialize.params -deeplabv3plus.params -deeplabv3plus.tar.gz +*.tgz +deeplabv3plus_gn_init* +deeplabv3plus_xception65_initialize* +*.log +*.sh +output* diff --git a/fluid/PaddleCV/deeplabv3+/README.md b/fluid/PaddleCV/deeplabv3+/README.md index b9990a2084..1938a55487 100644 --- a/fluid/PaddleCV/deeplabv3+/README.md +++ b/fluid/PaddleCV/deeplabv3+/README.md @@ -72,20 +72,19 @@ python train.py --help 以上命令用于测试训练过程是否正常,仅仅迭代了50次并且使用了1的batch size,如果需要复现 原论文的实验,请使用以下设置: ``` +CUDA_VISIBLE_DEVICES=0 \ python ./train.py \ - --batch_size=8 \ + --batch_size=4 \ --parallel=True \ --norm_type=gn \ --train_crop_size=769 \ - --total_step=90000 \ + --total_step=500000 \ --base_lr=0.001 \ --init_weights_path=deeplabv3plus_gn_init \ --save_weights_path=output \ --dataset_path=$DATASET_PATH ``` -如果您的显存不足,可以尝试减小`batch_size`,同时等比例放大`total_step`, 保证相乘的值不变,这得益于Group Norm的特性,改变 `batch_size` 并不会显著影响结果,而且能够节约更多显存, 比如您可以设置`--batch_size=4 --total_step=180000`。 - -如果您希望使用多卡进行训练,可以同比增加`batch_size`,减小`total_step`, 比如原来单卡训练是`--batch_size=4 --total_step=180000`,使用4卡训练则是`--batch_size=16 --total_step=45000` +如果您的显存不足,可以尝试减小`batch_size`,同时等比例放大`total_step`, 缩小`base_lr`, 保证相乘的值不变,这得益于Group Norm的特性,改变 `batch_size` 并不会显著影响结果,而且能够节约更多显存, 比如您可以设置`--batch_size=2 --total_step=1000000 --base_lr=0.0005`。 ### 测试 执行以下命令在`Cityscape`测试数据集上进行测试: @@ -110,7 +109,6 @@ step: 500, mIoU: 0.7881 |数据集 | norm type | pretrained model | trained model | mean IoU |---|---|---|---|---| -|CityScape | batch norm | [deeplabv3plus_xception65_initialize.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_xception65_initialize.tgz) | [deeplabv3plus.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus.tgz) | 0.7873 | |CityScape | group norm | [deeplabv3plus_gn_init.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_gn_init.tgz) | [deeplabv3plus_gn.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_gn.tgz) | 0.7881 | ## 参考 diff --git a/fluid/PaddleCV/deeplabv3+/eval.py b/fluid/PaddleCV/deeplabv3+/eval.py index 4620dd5d7c..7d1f15cc73 100644 --- a/fluid/PaddleCV/deeplabv3+/eval.py +++ b/fluid/PaddleCV/deeplabv3+/eval.py @@ -137,7 +137,4 @@ def load_model(): all_correct = right.copy() mp = (wrong + right) != 0 miou2 = np.mean((right[mp] * 1.0 / (right[mp] + wrong[mp]))) - if args.verbose: - print('step: %s, mIoU: %s' % (i + 1, miou2), flush=True) - else: - print('\rstep: %s, mIoU: %s' % (i + 1, miou2), end='\r', flush=True) + print('step: %s, mIoU: %s' % (i + 1, miou2)) diff --git a/fluid/PaddleCV/deeplabv3+/reader.py b/fluid/PaddleCV/deeplabv3+/reader.py index d420f0a264..a660f924c3 100644 --- a/fluid/PaddleCV/deeplabv3+/reader.py +++ b/fluid/PaddleCV/deeplabv3+/reader.py @@ -9,7 +9,7 @@ default_config = { "shuffle": True, "min_resize": 0.5, - "max_resize": 2, + "max_resize": 4, "crop_size": 769, } @@ -90,9 +90,21 @@ def get_img(self): break if shape == -1: return img, label, ln - random_scale = np.random.rand(1) * (self.config['max_resize'] - - self.config['min_resize'] - ) + self.config['min_resize'] + + if np.random.rand() > 0.5: + range_l = 1 + range_r = self.config['max_resize'] + else: + range_l = self.config['min_resize'] + range_r = 1 + + if np.random.rand() > 0.5: + assert len(img.shape) == 3 and len( + label.shape) == 3, "{} {}".format(img.shape, label.shape) + img = img[:, :, ::-1] + label = label[:, :, ::-1] + + random_scale = np.random.rand(1) * (range_r - range_l) + range_l crop_size = int(shape / random_scale) bb = crop_size // 2 diff --git a/fluid/PaddleCV/deeplabv3+/train.py b/fluid/PaddleCV/deeplabv3+/train.py index 799a1900f5..65b4cb8d49 100755 --- a/fluid/PaddleCV/deeplabv3+/train.py +++ b/fluid/PaddleCV/deeplabv3+/train.py @@ -21,10 +21,10 @@ add_arg = lambda *args: utility.add_arguments(*args, argparser=parser) # yapf: disable -add_arg('batch_size', int, 2, "The number of images in each batch during training.") +add_arg('batch_size', int, 4, "The number of images in each batch during training.") add_arg('train_crop_size', int, 769, "Image crop size during training.") -add_arg('base_lr', float, 0.0001, "The base learning rate for model training.") -add_arg('total_step', int, 90000, "Number of the training step.") +add_arg('base_lr', float, 0.001, "The base learning rate for model training.") +add_arg('total_step', int, 500000, "Number of the training step.") add_arg('init_weights_path', str, None, "Path of the initial weights in paddlepaddle format.") add_arg('save_weights_path', str, None, "Path of the saved weights during training.") add_arg('dataset_path', str, None, "Cityscape dataset path.") @@ -39,7 +39,7 @@ parser.add_argument( '--enable_ce', action='store_true', - help='If set, run the task with continuous evaluation logs.') + help='If set, run the task with continuous evaluation logs. Users can ignore this agument.') #yapf: enable @contextlib.contextmanager @@ -87,7 +87,8 @@ def loss(logit, label): label = fluid.layers.reshape(label, [-1, 1]) label = fluid.layers.cast(label, 'int64') label_nignore = fluid.layers.reshape(label_nignore, [-1, 1]) - loss = fluid.layers.softmax_with_cross_entropy(logit, label, ignore_index=255, numeric_stable_mode=True) + logit = fluid.layers.softmax(logit, use_cudnn=False) + loss = fluid.layers.cross_entropy(logit, label, ignore_index=255) label_nignore.stop_gradient = True label.stop_gradient = True return loss, label_nignore