From f6c49523d2359ee598a8ba1793a8e958b52c20ca Mon Sep 17 00:00:00 2001 From: nihui Date: Tue, 25 May 2021 23:39:26 +0800 Subject: [PATCH] fix convolutiondepthwise 3x3 int8 pack8, fix #2952 --- src/layer/arm/convolutiondepthwise_3x3_pack8_int8.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/layer/arm/convolutiondepthwise_3x3_pack8_int8.h b/src/layer/arm/convolutiondepthwise_3x3_pack8_int8.h index 60c014a9f2fb..47714a431402 100644 --- a/src/layer/arm/convolutiondepthwise_3x3_pack8_int8.h +++ b/src/layer/arm/convolutiondepthwise_3x3_pack8_int8.h @@ -36,7 +36,7 @@ static void convdw3x3s1_pack8_int8_neon(const Mat& bottom_blob, Mat& top_blob, c const signed char* r0 = img0.row(0); const signed char* r1 = img0.row(1); const signed char* r2 = img0.row(2); - const signed char* r3 = img0.row(2); + const signed char* r3 = img0.row(3); int8x8_t _k00 = vld1_s8(k0); int8x8_t _k01 = vld1_s8(k0 + 8); @@ -149,6 +149,7 @@ static void convdw3x3s1_pack8_int8_neon(const Mat& bottom_blob, Mat& top_blob, c r0 += 16; r1 += 16; r2 += 16; + r3 += 16; outptr0 += 16; outptr1 += 16; } @@ -220,6 +221,9 @@ static void convdw3x3s1_pack8_int8_neon(const Mat& bottom_blob, Mat& top_blob, c r1 += 2 * 8 + w * 8; r2 += 2 * 8 + w * 8; r3 += 2 * 8 + w * 8; + + outptr0 += outw * 8; + outptr1 += outw * 8; } for (; i < outh; i++) { @@ -422,7 +426,7 @@ static void convdw3x3s2_pack8_int8_neon(const Mat& bottom_blob, Mat& top_blob, c vst1q_s32(outptr0, _sum00); vst1q_s32(outptr0 + 4, _sum01); - vst1q_s32(outptr0 + 8, _sum11); + vst1q_s32(outptr0 + 8, _sum10); vst1q_s32(outptr0 + 12, _sum11); r0 += 32;