Skip to content

Commit 0458790

Browse files
author
Luan Cardoso
committed
Test for inline ptx
1 parent 6c80d5a commit 0458790

17 files changed

+1698
-2
lines changed

Makefile

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
CXX=g++
2+
CPP=cpp
23
NVCC=nvcc -rdc=true --generate-line-info --std=c++14 --maxrregcount=128 #-Xlinker=--no-relax
34
NVOPTS=--compile
45
NVARCH= --gpu-architecture=compute_80 --gpu-code=sm_86
@@ -9,7 +10,7 @@ FR=fr fr_cpy fr_reduce4 fr_eq fr_neq fr_neg fr_x2 fr_x3 fr_x4 fr_x8 fr_x12 fr_ad
910
G1=g1a g1p g1p_compare g1p_add g1p_dbl g1p_mul g1p_neg g1p_scale g1p_ispoint g1p_sub g1p_addsub g1p_fft g1p_ptx
1011
FK20=fk20 fk20_poly2h_fft fk20_poly2toeplitz_coefficients fk20_poly2toeplitz_coefficients_fft fk20_poly2hext_fft fk20_msm fk20_hext_fft2h_fft
1112

12-
FPTEST=test fptest fptest_kat fptest_cmp fptest_mma fptest_inv fptest_add fptest_sub fptest_mul fptest_mulconst fptest_sqr fptest_distributive fptest_fibonacci
13+
FPTEST=test fptest fptest_kat fptest_cmp fptest_mma fptest_inv fptest_add fptest_sub fptest_mul fptest_mulconst fptest_sqr fptest_distributive fptest_fibonacci fp_ptx
1314
FRTEST=test frtest frtest_kat frtest_cmp frtest_add frtest_mul frtest_inv frtest_sub frtest_addsub frtest_fibonacci frtest_mulconst frtest_sqr frtest_distributive frtest_fft
1415
G1TEST=test g1test g1test_kat g1test_fibonacci g1test_dbl g1test_fft
1516
FK20TEST=test fk20test fk20test_poly fk20_testvector fk20test_fft fk20test_fft_rand
@@ -85,6 +86,12 @@ clobber: clean
8586
%.o: %.cu
8687
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
8788

89+
%_ptx.ptx: %_ptx.ptxm
90+
$(CPP) $< $@
91+
92+
%_ptx.o: %_ptx.ptx
93+
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
94+
8895
%: %.o
8996
$(NVCC) $(NVARCH) -o $@ $^ --resource-usage
9097

fp_ptx.cuh

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
// bls12_381: Arithmetic for BLS12-381
3+
// Copyright 2022-2023 Dag Arne Osvik
4+
// Copyright 2022-2023 Luan Cardoso dos Santos
5+
6+
#ifndef FP_PTX_CUH
7+
#define FP_PTX_CUH
8+
9+
#include "fp.cuh"
10+
11+
12+
extern __device__ void fp_add_ptx(fp_t &z, const fp_t &x, const fp_t &y);
13+
extern __device__ void fp_sub_ptx(fp_t &z, const fp_t &x, const fp_t &y);
14+
extern __device__ void fp_mul_ptx(fp_t &z, const fp_t &x, const fp_t &y);
15+
extern __device__ void fp_sqr_ptx(fp_t &z, const fp_t &x);
16+
17+
extern __device__ void fp_x2_ptx(fp_t &z, const fp_t &x);
18+
extern __device__ void fp_x3_ptx(fp_t &z, const fp_t &x);
19+
extern __device__ void fp_x4_ptx(fp_t &z, const fp_t &x);
20+
extern __device__ void fp_x8_ptx(fp_t &z, const fp_t &x);
21+
extern __device__ void fp_x12_ptx(fp_t &z, const fp_t &x);
22+
#endif

0 commit comments

Comments
 (0)