Skip to content

Commit 5f7dd25

Browse files
authored
flate, zstd: Shave some bytes off amd64 matchLen (#963)
XORQ sets the flags, so there is no need for TESTQ. Use a 32-bit shift after TZCNT/BSF.
1 parent 3a0faf3 commit 5f7dd25

File tree

2 files changed

+8
-12
lines changed

2 files changed

+8
-12
lines changed

flate/matchlen_amd64.s

+4-6
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include "textflag.h"
66

77
// func matchLen(a []byte, b []byte) int
8-
// Requires: BMI
98
TEXT ·matchLen(SB), NOSPLIT, $0-56
109
MOVQ a_base+0(FP), AX
1110
MOVQ b_base+24(FP), CX
@@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56
1716
JB matchlen_match4_standalone
1817

1918
matchlen_loopback_standalone:
20-
MOVQ (AX)(SI*1), BX
21-
XORQ (CX)(SI*1), BX
22-
TESTQ BX, BX
23-
JZ matchlen_loop_standalone
19+
MOVQ (AX)(SI*1), BX
20+
XORQ (CX)(SI*1), BX
21+
JZ matchlen_loop_standalone
2422

2523
#ifdef GOAMD64_v3
2624
TZCNTQ BX, BX
2725
#else
2826
BSFQ BX, BX
2927
#endif
30-
SARQ $0x03, BX
28+
SHRL $0x03, BX
3129
LEAL (SI)(BX*1), SI
3230
JMP gen_match_len_end
3331

zstd/matchlen_amd64.s

+4-6
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include "textflag.h"
66

77
// func matchLen(a []byte, b []byte) int
8-
// Requires: BMI
98
TEXT ·matchLen(SB), NOSPLIT, $0-56
109
MOVQ a_base+0(FP), AX
1110
MOVQ b_base+24(FP), CX
@@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56
1716
JB matchlen_match4_standalone
1817

1918
matchlen_loopback_standalone:
20-
MOVQ (AX)(SI*1), BX
21-
XORQ (CX)(SI*1), BX
22-
TESTQ BX, BX
23-
JZ matchlen_loop_standalone
19+
MOVQ (AX)(SI*1), BX
20+
XORQ (CX)(SI*1), BX
21+
JZ matchlen_loop_standalone
2422

2523
#ifdef GOAMD64_v3
2624
TZCNTQ BX, BX
2725
#else
2826
BSFQ BX, BX
2927
#endif
30-
SARQ $0x03, BX
28+
SHRL $0x03, BX
3129
LEAL (SI)(BX*1), SI
3230
JMP gen_match_len_end
3331

0 commit comments

Comments
 (0)