From d7249dafb43825dc2048a6bdce4e3c9b7606e0ae Mon Sep 17 00:00:00 2001 From: Jun He Date: Wed, 25 May 2022 22:26:41 +0800 Subject: [PATCH] common: apply two stage copy to aarch64 On aarch64 ZSTD_wildcopy uses a simple loop to do 16B based memory copy. There is existing optimized two stage copy that can achieve better performance. By applying this to aarch64 it is also observed ~1% uplift in silesia corpus. Signed-off-by: Jun He Change-Id: Ic1253308e7a8a7df2d08963ba544e086c81ce8be --- lib/common/zstd_internal.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 8e2b84a2365..e76b8e19d64 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -235,12 +235,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e * one COPY16() in the first call. Then, do two calls per loop since * at that point it is more likely to have a high trip count. */ -#ifdef __aarch64__ - do { - COPY16(op, ip); - } - while (op < oend); -#else ZSTD_copy16(op, ip); if (16 >= length) return; op += 16; @@ -250,7 +244,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e COPY16(op, ip); } while (op < oend); -#endif } }