diff --git a/ref/poly.c b/ref/poly.c index 017cacf5..3e73579e 100644 --- a/ref/poly.c +++ b/ref/poly.c @@ -19,6 +19,7 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) { unsigned int i,j; int16_t u; + uint32_t d0; uint8_t t[8]; #if (KYBER_POLYCOMPRESSEDBYTES == 128) @@ -27,7 +28,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; +/* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ + d0 = u << 4; + d0 += 1665; + d0 *= 80635; + d0 >>= 28; + t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); @@ -42,7 +48,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) // map to positive standard representatives u = a->coeffs[8*i+j]; u += (u >> 15) & KYBER_Q; - t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; +/* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ + d0 = u << 5; + d0 += 1664; + d0 *= 40318; + d0 >>= 27; + t[j] = d0 & 0x1f; } r[0] = (t[0] >> 0) | (t[1] << 5); diff --git a/ref/polyvec.c b/ref/polyvec.c index 8420d069..669f6a5f 100644 --- a/ref/polyvec.c +++ b/ref/polyvec.c @@ -15,6 +15,7 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) { unsigned int i,j,k; + uint64_t d0; #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) uint16_t t[8]; @@ -23,7 +24,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<8;k++) { t[k] = a->vec[i].coeffs[8*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; +/* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ + d0 = t[k]; + d0 <<= 11; + d0 += 1664; + d0 *= 645084; + d0 >>= 31; + t[k] = d0 & 0x7ff; } r[ 0] = (t[0] >> 0); @@ -47,7 +54,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) for(k=0;k<4;k++) { t[k] = a->vec[i].coeffs[4*j+k]; t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; - t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; +/* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ + d0 = t[k]; + d0 <<= 10; + d0 += 1665; + d0 *= 1290167; + d0 >>= 32; + t[k] = d0 & 0x3ff; } r[0] = (t[0] >> 0); diff --git a/ref/test_kyber.c b/ref/test_kyber.c index 0f28af55..cac6b417 100644 --- a/ref/test_kyber.c +++ b/ref/test_kyber.c @@ -6,7 +6,7 @@ #define NTESTS 1000 -static int test_keys() +static int test_keys(void) { uint8_t pk[CRYPTO_PUBLICKEYBYTES]; uint8_t sk[CRYPTO_SECRETKEYBYTES]; @@ -31,7 +31,7 @@ static int test_keys() return 0; } -static int test_invalid_sk_a() +static int test_invalid_sk_a(void) { uint8_t pk[CRYPTO_PUBLICKEYBYTES]; uint8_t sk[CRYPTO_SECRETKEYBYTES]; @@ -59,7 +59,7 @@ static int test_invalid_sk_a() return 0; } -static int test_invalid_ciphertext() +static int test_invalid_ciphertext(void) { uint8_t pk[CRYPTO_PUBLICKEYBYTES]; uint8_t sk[CRYPTO_SECRETKEYBYTES]; diff --git a/ref/test_speed.c b/ref/test_speed.c index 09cb6556..d10cb24f 100644 --- a/ref/test_speed.c +++ b/ref/test_speed.c @@ -16,7 +16,7 @@ uint64_t t[NTESTS]; uint8_t seed[KYBER_SYMBYTES] = {0}; -int main() +int main(void) { unsigned int i; uint8_t pk[CRYPTO_PUBLICKEYBYTES]; diff --git a/runtests.sh b/runtests.sh index ae0fbaea..f844a9d5 100755 --- a/runtests.sh +++ b/runtests.sh @@ -11,13 +11,13 @@ fi if [ "$ARCH" = "amd64" -o "$ARCH" = "arm64" ]; then export CC="clang" - export CFLAGS="-fsanitize=address,undefined ${CFLAGS}" +# export CFLAGS="-fsanitize=address,undefined ${CFLAGS}" fi for dir in $DIRS; do make -j$(nproc) -C $dir for alg in 512 768 1024 512-90s 768-90s 1024-90s; do - #valgrind --vex-guest-max-insns=25 ./$dir/test_kyber$alg + valgrind --vex-guest-max-insns=25 ./$dir/test_kyber$alg ./$dir/test_kyber$alg & PID1=$! ./$dir/test_kex$alg &