Skip to content

Commit

Permalink
fix convolutiondepthwise 3x3 int8 pack8, fix #2952
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed May 25, 2021
1 parent a3b1377 commit f6c4952
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/layer/arm/convolutiondepthwise_3x3_pack8_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static void convdw3x3s1_pack8_int8_neon(const Mat& bottom_blob, Mat& top_blob, c
const signed char* r0 = img0.row<const signed char>(0);
const signed char* r1 = img0.row<const signed char>(1);
const signed char* r2 = img0.row<const signed char>(2);
const signed char* r3 = img0.row<const signed char>(2);
const signed char* r3 = img0.row<const signed char>(3);

int8x8_t _k00 = vld1_s8(k0);
int8x8_t _k01 = vld1_s8(k0 + 8);
Expand Down Expand Up @@ -149,6 +149,7 @@ static void convdw3x3s1_pack8_int8_neon(const Mat& bottom_blob, Mat& top_blob, c
r0 += 16;
r1 += 16;
r2 += 16;
r3 += 16;
outptr0 += 16;
outptr1 += 16;
}
Expand Down Expand Up @@ -220,6 +221,9 @@ static void convdw3x3s1_pack8_int8_neon(const Mat& bottom_blob, Mat& top_blob, c
r1 += 2 * 8 + w * 8;
r2 += 2 * 8 + w * 8;
r3 += 2 * 8 + w * 8;

outptr0 += outw * 8;
outptr1 += outw * 8;
}
for (; i < outh; i++)
{
Expand Down Expand Up @@ -422,7 +426,7 @@ static void convdw3x3s2_pack8_int8_neon(const Mat& bottom_blob, Mat& top_blob, c

vst1q_s32(outptr0, _sum00);
vst1q_s32(outptr0 + 4, _sum01);
vst1q_s32(outptr0 + 8, _sum11);
vst1q_s32(outptr0 + 8, _sum10);
vst1q_s32(outptr0 + 12, _sum11);

r0 += 32;
Expand Down

0 comments on commit f6c4952

Please sign in to comment.