-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMakefile
263 lines (187 loc) · 7.74 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
CXX=g++
CPP=cpp
NVCC=nvcc -rdc=true --std=c++14 -Xlinker=--no-relax -DNDEBUG# -G # --generate-line-info # --maxrregcount=128
NVOPTS=--compile
NVARCH= --gpu-architecture=compute_86 --gpu-code=sm_86
COPTS= -O2
FP=fp fp_cpy fp_reduce6 fp_eq fp_neq fp_neg fp_x2 fp_x3 fp_x4 fp_x8 fp_x12 fp_add fp_sub fp_sqr fp_mul fp_inv fp_isone fp_iszero fp_nonzero fp_mma
FR=fr fr_cpy fr_reduce4 fr_eq fr_neq fr_neg fr_x2 fr_x3 fr_x4 fr_x8 fr_x12 fr_add fr_sub fr_sqr fr_mul fr_inv fr_isone fr_iszero fr_nonzero fr_roots fr_fft fr_addsub
G1=g1a g1p g1p_compare g1p_add g1p_dbl g1p_mul g1p_neg g1p_scale g1p_ispoint g1p_sub g1p_fft g1p_multi g1p_fft_accel g1p_addsub
FK20=fk20 fk20_poly2h_fft fk20_poly2toeplitz_coefficients fk20_poly2toeplitz_coefficients_fft fk20_poly2hext_fft fk20_msm fk20_hext_fft2h_fft fk20_hext_fft2h_fft_512
FPTEST=test fptest fptest_kat fptest_cmp fptest_mma fptest_inv fptest_add fptest_sub fptest_mul fptest_mulconst fptest_sqr fptest_distributive fptest_fibonacci fp_ptx fpPTXtest
FRTEST=test frtest frtest_kat frtest_cmp frtest_add frtest_mul frtest_inv frtest_sub frtest_addsub frtest_fibonacci frtest_mulconst frtest_sqr frtest_distributive frtest_fft
G1TEST=test g1test g1test_kat g1test_fibonacci g1test_dbl g1test_fft
G1TEST_PTX=test g1ptest_ptx g1p_ptx
G1P_FFT_ACCEL = g1p_fft_accel
FK20TEST=test fk20test fk20test_poly fk20_testvector fk20test_fft #fk20test_fft_rand
FK20TEST_TC=test fk20test_poly2toeplitz_coefficients polynomial toeplitz_coefficients
FK20TEST_TCFFT=test fk20test_poly2toeplitz_coefficients_fft polynomial toeplitz_coefficients_fft
FFTTEST=fftTest parseFFTTest
FK20_512TEST=test fk20_512test #xext_fft polynomial toeplitz_coefficients toeplitz_coefficients_fft hext_fft h h_fft
FK20_512TEST_BOOTSTRAP = test fk20_512test_bootstrap xext_fft polynomial toeplitz_coefficients toeplitz_coefficients_fft hext_fft h h_fft
FK20BENCHMARK=fk20benchmark fk20_testvector
FK20PROFILE=fk20profile fk20_testvector
FP_OBJS=$(FP:%=%.o)
FR_OBJS=$(FR:%=%.o)
G1_OBJS=$(G1:%=%.o)
FK20_OBJS=$(FK20:%=%.o)
FK20_OBJS=$(FK20:%=%.o)
FP_CUBIN=$(FP:%=%.cubin)
FR_CUBIN=$(FR:%=%.cubin)
G1_CUBIN=$(G1:%=%.cubin)
FK20_CUBIN=$(FK20:%=%.cubin)
FPTEST_OBJS=$(FPTEST:%=%.o)
FRTEST_OBJS=$(FRTEST:%=%.o)
G1TEST_OBJS=$(G1TEST:%=%.o)
G1TEST_PTX_OBJS=$(G1TEST_PTX:%=%.o)
G1P_FFT_ACCEL_OBJS=$(G1P_FFT_ACCEL:%=%.o)
FK20TEST_OBJS=$(FK20TEST:%=%.o)
FK20TEST_TC_OBJS=$(FK20TEST_TC:%=%.o)
FK20TEST_TCFFT_OBJS=$(FK20TEST_TCFFT:%=%.o)
FFTTEST_OBJS=$(FFTTEST:%=%.o)
FK20_512TEST_OBJS=$(FK20_512TEST:%=%.o)
FK20_512TEST_BOOSTRAP_OBJS=$(FK20_512TEST_BOOTSTRAP:%=%.o)
FK20BENCHMARK_OBJS=$(FK20BENCHMARK:%=%.o)
FK20PROFILE_OBJS=$(FK20PROFILE:%=%.o)
OBJS=$(FP_OBJS) $(FR_OBJS) $(G1_OBJS) $(FK20_OBJS)
CUBIN=$(FP_CUBIN) $(FR_CUBIN) $(G1_CUBIN) $(FK20_CUBIN)
TEST_OBJS=$(FPTEST_OBJS) $(FRTEST_OBJS) $(G1TEST_OBJS) $(FK20TEST_OBJS) $(FK20_512TEST_OBJS)
all: fptest frtest g1test fk20test ffttest fk20_512test fk20test_poly2toeplitz_coefficients fk20test_poly2toeplitz_coefficients_fft fk20profile fk20benchmark
#add some debug flags.
debug:
$(eval NVCC += -g -G --maxrregcount=128 -DDEBUG)
profile:
$(eval NVCC += --generate-line-info)
run: fp-run fr-run g1-run fk20-run
fp-run: fptest
./fptest
fr-run: frtest
./frtest
g1-run: g1test
./g1test
fk20-run: fk20test
./fk20test
g1p_fft_accel-run: g1p_fft_accel
./g1p_fft_accel
fk20_512test-run: fk20_512test
./fk20_512test
cubin: $(CUBIN)
clean:
-rm -f $(OBJS) $(TEST_OBJS) $(CUBIN)
-rm -f xext_fft.cu polynomial.cu toeplitz_coefficients.cu toeplitz_coefficients_fft.cu hext_fft.cu h.cu h_fft.cu
shallowclean:
@(echo "Removing only objects that are fast to compile!")
-rm -f $(OBJS) $(CUBIN)
clobber: clean
-rm -f fptest frtest g1test fk20test fk20_512test fk20test_poly2toeplitz_coefficients fk20test_poly2toeplitz_coefficients_fft
%.ptx: %.ptxm
$(CPP) $< .$@.ptx-tmp
sed s/newline/\\n\\t/g .$@.ptx-tmp > $@
rm -f .$@.ptx-tmp
%.cubin: %.ptx
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $< -cubin
%.ptx: %.cu
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $< -ptx
%.o: %.ptx
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
%.o: %.cu
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
%: %.o
$(NVCC) $(NVARCH) -o $@ $^ --resource-usage
g1p_ptx.o: g1p_ptx.ptx
fp_add.o: fp_add.cu fp_add.cuh
fp_addsub.o: fp_addsub.cu fp_add.cuh
fp.o: fp.cu fp.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
fr.o: fr.cu fr.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
fp%.o: fp%.cu fp.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
fr%.o: fr%.cu fr.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
g1%.o: g1%.cu g1.cuh fp.cuh fr.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
fptest_%.o: fptest_%.cu fptest.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
frtest_%.o: frtest_%.cu frtest.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $<
fptest.o: fptest.cu fp.cuh
$(NVCC) $(NVOPTS) -o $@ -c $<
frtest.o: frtest.cu fr.cuh
$(NVCC) $(NVOPTS) -o $@ -c $<
g1test.o: g1test.cu g1.cuh fp.cuh fr.cuh
$(NVCC) $(NVOPTS) -o $@ -c $<
fk20test.o: fk20test.cu fk20.cuh g1.cuh fp.cuh fr.cuh
$(NVCC) $(NVOPTS) -o $@ -c $<
parseFFTTest.o: parseFFTTest.c
gcc -g3 -ggdb $(COPTS) -o $@ -c $<
ffttest.o: fftTest.cu fk20.cuh g1.cuh fp.cuh fr.cuh parseFFTTest.c
$(NVCC) $(COPTS) -o $@ -c $<
fptest: $(FPTEST_OBJS) $(FP_OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
fptest_ptx: fptest_ptx.cu $(FP_OBJS) fp_ptx.o # fp_x2.ptx fp_x3.ptx fp_x4.ptx fp_x8.ptx fp_x12.ptx fp_add.ptx fp_sub.ptx fp_sqr.ptx fp_mul.ptx fp_reduce12.ptx
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
frtest: $(FRTEST_OBJS) $(FR_OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
g1test: $(G1TEST_OBJS) $(OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
g1ptest_ptx.o: g1ptest_ptx.cu
$(NVCC) $(COPTS) -o $@ -c $<
g1test_ptx: $(G1TEST_PTX_OBJS) $(FP_OBJS) $(FR_OBJS) $(G1_OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
g1p_fft_accel: $(G1P_FFT_ACCEL_OBJS) $(FP_OBJS) $(FR_OBJS) $(G1_OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
fk20test: $(FK20TEST_OBJS) $(OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
fk20test_poly2toeplitz_coefficients: $(FK20TEST_TC_OBJS) $(OBJS)
$(NVCC) $(NVARCH) -o $@ $^ -G # --resource-usage
fk20test_poly2toeplitz_coefficients_fft: $(FK20TEST_TCFFT_OBJS) $(OBJS)
$(NVCC) $(NVARCH) -o $@ $^ -G # --resource-usage
ffttest: $(FFTTEST_OBJS) $(OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
#use this rule to generate the large objects without debug symbols
fk20_512test_objs: $(FK20_512TEST_OBJS)
#use this rule to remake objects
fk20_objs: $(OBJS)
fk20_512test: $(FK20_512TEST_OBJS) $(OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
fk20_512test_bootstrap: $(FK20_512TEST_BOOSTRAP_OBJS) $(OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
fk20benchmark: $(FK20BENCHMARK_OBJS) $(OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
fk20profile: $(FK20PROFILE_OBJS) $(OBJS)
$(NVCC) $(NVARCH) -o $@ $^ # --resource-usage
fp%.cubin: fp%.cu fp.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $< -cubin
fr%.cubin: fr%.cu fr.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $< -cubin
g1%.cubin: g1%.cu g1.cuh fp.cuh fr.cuh
$(NVCC) $(NVOPTS) $(NVARCH) -o $@ -c $< -cubin
##############################
#
# Test vector generation
#
##############################
512:=$(shell ./512.sh)
define ROW_template =
test/fk20test-fib-1-$(1).cu: FK20Py/fk20_multi_cuda.py FK20Py/fk20_single_cuda.py
-mkdir -p test
$$< 1 $(1) > $$@
ALL_ROWS += test/fk20test-fib-1-$(1).cu
endef
$(foreach i,$512,$(eval $(call ROW_template,$i)))
testvector: $(ALL_ROWS)
xext_fft.cu: test/fk20test-fib-1-0.cu
(echo \#include \"g1.cuh\"; echo; grep -A 40993 -B1 xext_fft $< ) > $@
polynomial.cu: $(ALL_ROWS)
./polynomial.sh > $@
toeplitz_coefficients.cu: $(ALL_ROWS)
./toeplitz_coefficients.sh > $@
toeplitz_coefficients_fft.cu: $(ALL_ROWS)
./toeplitz_coefficients_fft.sh > $@
hext_fft.cu: $(ALL_ROWS)
./hext_fft.sh > $@
h.cu: $(ALL_ROWS)
./h.sh > $@
h_fft.cu: $(ALL_ROWS)
./h_fft.sh > $@