Skip to content

Commit 29ef21c

Browse files
Address Review Comments
Signed-off-by: Naveen Tatikonda <navtat@amazon.com>
1 parent 933de3e commit 29ef21c

File tree

1 file changed

+8
-12
lines changed

1 file changed

+8
-12
lines changed

faiss/impl/ScalarQuantizer.cpp

+8-12
Original file line numberDiff line numberDiff line change
@@ -767,8 +767,8 @@ struct SimilarityL2<8> {
767767
float32x4_t sub0 = vsubq_f32(yiv.val[0], x.val[0]);
768768
float32x4_t sub1 = vsubq_f32(yiv.val[1], x.val[1]);
769769

770-
float32x4_t accu8_0 = vaddq_f32(accu8.val[0], vmulq_f32(sub0, sub0));
771-
float32x4_t accu8_1 = vaddq_f32(accu8.val[1], vmulq_f32(sub1, sub1));
770+
float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], sub0, sub0);
771+
float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], sub1, sub1);
772772

773773
float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1);
774774
accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]);
@@ -780,8 +780,8 @@ struct SimilarityL2<8> {
780780
float32x4_t sub0 = vsubq_f32(y.val[0], x.val[0]);
781781
float32x4_t sub1 = vsubq_f32(y.val[1], x.val[1]);
782782

783-
float32x4_t accu8_0 = vaddq_f32(accu8.val[0], vmulq_f32(sub0, sub0));
784-
float32x4_t accu8_1 = vaddq_f32(accu8.val[1], vmulq_f32(sub1, sub1));
783+
float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], sub0, sub0);
784+
float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], sub1, sub1);
785785

786786
float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1);
787787
accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]);
@@ -892,21 +892,17 @@ struct SimilarityIP<8> {
892892
float32x4x2_t yiv = vld1q_f32_x2(yi);
893893
yi += 8;
894894

895-
float32x4_t accu8_0 =
896-
vaddq_f32(accu8.val[0], vmulq_f32(yiv.val[0], x.val[0]));
897-
float32x4_t accu8_1 =
898-
vaddq_f32(accu8.val[1], vmulq_f32(yiv.val[1], x.val[1]));
895+
float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], yiv.val[0], x.val[0]);
896+
float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], yiv.val[1], x.val[1]);
899897
float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1);
900898
accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]);
901899
}
902900

903901
FAISS_ALWAYS_INLINE void add_8_components_2(
904902
float32x4x2_t x1,
905903
float32x4x2_t x2) {
906-
float32x4_t accu8_0 =
907-
vaddq_f32(accu8.val[0], vmulq_f32(x1.val[0], x2.val[0]));
908-
float32x4_t accu8_1 =
909-
vaddq_f32(accu8.val[1], vmulq_f32(x1.val[1], x2.val[1]));
904+
float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], x1.val[0], x2.val[0]);
905+
float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], x1.val[1], x2.val[1]);
910906
float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1);
911907
accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]);
912908
}

0 commit comments

Comments
 (0)